This analysis uses ABCD Release 4.0.
This script is modified based on the social economics status and developmental analysis with elastic net version 2.0.
rm(list=ls())
gc()
library(tidyverse)
library(qgraph)
library(pander)
library(summarytools)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(tidymodels)
library(knitr)
library(extrafont)
## for poisson class of elastic net
library(poissonreg)
library("sva")
### plotting libraries
library(ggtext)
library(ggpubr)
library(cowplot)
library(ggthemes)
### package for pls analysis (all packages are necessary for the model to run)
library("pls")
library("mixOmics")
library(plsmod)
Using ABCD 4.0
set up parallel
# parallel for ubuntu
#doParallel::registerDoParallel(cores=30)
### parallel library for mac
theme_set(theme_bw() + theme(panel.grid = element_blank()))
## parallel processing number of cores register
all_cores <- parallel::detectCores(logical = FALSE) - 5
doParallel::registerDoParallel(cores = all_cores)
## this one works for ubuntu but slow
#library(doFuture)
#registerDoFuture()
#plan(multicore(workers = 30))
### parallel for windows
#library(doFuture)
#registerDoFuture()
#plan(multisession(workers = 30))
ACS <-read_csv(paste0(dataFold,"ACSPSW03_DATA_TABLE.csv"))
## Rows: 23101 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): SUBJECTKEY, SRC_SUBJECT_ID, INTERVIEW_DATE, SEX, EVENTNAME, GENETI...
## dbl (18): ACSPSW03_ID, DATASET_ID, INTERVIEW_AGE, RACE_ETHNICITY, REL_FAMILY...
## lgl (6): GENETIC_PAIRED_SUBJECTID_4, GENETIC_PI_HAT_3, GENETIC_PI_HAT_4, GE...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#knitr::kable(glimpse(ACS))
# guardian-report relationship
# Relationship of the participant in his or her family
# 0 = single; 1 = sibling; 2 = twin; 3 = triplet
# ACS %>% count(REL_RELATIONSHIP)
ACSselected <- ACS %>%
dplyr::select(SUBJECTKEY, EVENTNAME, INTERVIEW_AGE,
REL_FAMILY_ID, ACS_RAKED_PROPENSITY_SCORE) %>%
mutate(REL_FAMILY_ID = as.factor(REL_FAMILY_ID))
ACSselected %>%
filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| factor | 1 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 11876 | 0 |
| EVENTNAME | 0 | 1 | 21 | 21 | 0 | 1 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| REL_FAMILY_ID | 0 | 1 | FALSE | 9854 | 373: 5, 749: 4, 11: 3, 400: 3 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| INTERVIEW_AGE | 0 | 1 | 118.98 | 7.50 | 107.00 | 112.00 | 119.00 | 126.00 | 133.00 | ▇▆▆▆▆ |
| ACS_RAKED_PROPENSITY_SCORE | 0 | 1 | 691.34 | 350.98 | 161.36 | 449.35 | 619.31 | 821.74 | 1778.92 | ▅▇▂▂▁ |
###loading site and scanner information
Siteinfo <-tibble::as_tibble(read.csv(paste0(dataFold, "ABCD_LT01_DATA_TABLE.csv")))
vision index
vision_idx <- as_tibble(read.csv(paste0(dataFold,"ABCD_SVS01_DATA_TABLE.CSV"))) %>%
mutate(visionProb = ifelse(SNELLEN_VA_Y == 0 | SNELLEN_VA_Y == 1 | VIS_FLG == 2, 1, 0))
#vision_idx %>% select(SNELLEN_VA_Y, VIS_FLG, visionProb) %>% arrange(SNELLEN_VA_Y)
change the wrong site manually based on the: “Release Notes: Adolescent Brain Cognitive Development Study ℠ (ABCD Study ® ) Data Release 4.0 Changes and Known Issues”
only fixed the baseline and two year followup that is used in the analysis
Siteinfo_fixed <- Siteinfo
site_fix <- readRDS(paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/site_fix', '.RData'))
for(i in 1:dim(site_fix)[1]){
fix_site_id <- site_fix$SUBJECTKEY[i]
fix_site_event <- site_fix$EVENTNAME[i]
fix_site <- site_fix$SITE_ID_L[i]
Siteinfo_fixed$SITE_ID_L[which(Siteinfo_fixed$SUBJECTKEY== fix_site_id& Siteinfo_fixed$EVENTNAME == fix_site_event)] <- fix_site
}
Siteinfo <-Siteinfo_fixed
TaskDVs1Batch = c("NIHTBX_PICVOCAB_UNCORRECTED",
"NIHTBX_READING_UNCORRECTED",
"NIHTBX_FLANKER_UNCORRECTED",
"NIHTBX_PATTERN_UNCORRECTED",
"NIHTBX_PICTURE_UNCORRECTED",
"PEA_RAVLT_LD_TRIAL_VII_TC")
subj_info <- c("SUBJECTKEY","EVENTNAME","SITE_ID_L")
Loading up pre-computed gfactor
baseline_train_gfactor <- purrr::map(gfactor_list,"output_train_baseline")
baseline_test_gfactor <- purrr::map(gfactor_list,"output_test_baseline")
followup_train_gfactor <- purrr::map(gfactor_list,"output_train_followup")
followup_test_gfactor <- purrr::map(gfactor_list,"output_test_followup")
basically only sleep related are good
not very relevant: ABCD Sum Scores Traumatic Brain Injury abcd_tbi01 ABCD Longitudinal Summary Scores Traumatic Brain Injury abcd_lsstbi01 ABCD Sum Scores Parent Sports and Activities Involvement abcd__spacss01 ABCD Longitudinal Summary Scores Sports Activity abcd_lsssa01 ABCD Sum Scores Parent Medical History abcd_medhxss01 ABCD Longitudinal Summary Scores Medical History abcd_lssmh01 ABCD Sum Scores Developmental History abcd_devhxss01
this is mainly about puberty: ABCD Sum Scores Physical Health Youth abcd_ssphy01
sleep scores: ABCD Parent Sleep Disturbance Scale for Children abcd_sds01 Diet only at one year follow up: ABCD Child Nutrition Assessment abcd_cna01 sum sleep score + diet ABCD Sum Scores Physical Health Parent abcd_ssphp01
#ABCD Parent Sleep Disturbance Scale for Children
#SLEEPDISTURB1_P
#How many hours of sleep does your child get on most nights? ¿Cuántas horas duerme su niño(a) la mayoría de las noches?
#1 = 9-11 hours/ 9 a 11 horas; 2 = 8-9 hours /8 a 9 horas; 3 = 7-8 hours /7 a 8 horas; 4 = 5-7 hours /5 a 7 horas; 5 = Less than 5 hours/ Menos de 5 horas// Consider each question pertaining to the PAST 6 MONTHS of the child's life
#SLEEPDISTURB2_P
#How long after going to bed does your child usually fall asleep? Después de acostarse ¿generalmente cuánto tiempo tarda su niño(a) en quedarse dormido(a)?
#1 = Less than 15 minutes /Menos de 15 minutos; 2 = 15-30 minutes 15 a 30 minutos; 3 = 30-45 minutes /30 a 45 minutos; 4 = 45-60 minutes /45 a 60 minutos; 5 = More than 60 minutes /M√°s de 60 minutos//Consider each question pertaining to the PAST 6 MONTHS of the child's life
sleepDis <-as_tibble(read.csv(paste0(dataFold,"ABCD_SDS01_DATA_TABLE.csv"))) %>%
distinct(dplyr::select(.,-ABCD_SDS01_ID, -DATASET_ID),.keep_all = TRUE) #for some reason there is a duplicate based on these two variables
# sleepDis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
# distinct(select(.,-ABCD_SDS01_ID, -DATASET_ID),.keep_all = TRUE) %>%
# arrange(SUBJECTKEY)
sleepDis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(SLEEPDISTURB1_P,SLEEPDISTURB2_P) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| SLEEPDISTURB1_P | 5 | 1 | 1.72 | 0.81 | 1 | 1 | 2 | 2 | 5 | ▇▆▂▁▁ |
| SLEEPDISTURB2_P | 5 | 1 | 1.93 | 0.98 | 1 | 1 | 2 | 2 | 5 | ▇▇▂▁▁ |
sleepDis %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(SLEEPDISTURB1_P,SLEEPDISTURB2_P) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| SLEEPDISTURB1_P | 74 | 0.99 | 1.99 | 0.87 | 1 | 1 | 2 | 2 | 5 | ▆▇▃▁▁ |
| SLEEPDISTURB2_P | 74 | 0.99 | 2.05 | 1.05 | 1 | 1 | 2 | 3 | 5 | ▇▇▃▁▁ |
PhysicalSum <-as_tibble(read.csv(paste0(dataFold,"ABCD_SSPHP01_DATA_TABLE.csv")))
PhysicalSum %>% dplyr::select(-1:-8) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 39766 |
| Number of columns | 36 |
| _______________________ | |
| Column type frequency: | |
| numeric | 36 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| SDS_P_SS_DIMS | 193 | 1.00 | 11.95 | 3.79 | 7 | 9 | 11 | 14 | 35 | ▇▃▁▁▁ |
| SDS_P_SS_DIMS_NM | 0 | 1.00 | 0.03 | 0.48 | 0 | 0 | 0 | 0 | 7 | ▇▁▁▁▁ |
| SDS_P_SS_DIMS_NT | 0 | 1.00 | 7.00 | 0.00 | 7 | 7 | 7 | 7 | 7 | ▁▁▇▁▁ |
| SDS_P_SS_SBD | 193 | 1.00 | 3.72 | 1.19 | 3 | 3 | 3 | 4 | 15 | ▇▁▁▁▁ |
| SDS_P_SS_SBD_NM | 0 | 1.00 | 0.01 | 0.21 | 0 | 0 | 0 | 0 | 3 | ▇▁▁▁▁ |
| SDS_P_SS_SBD_NT | 0 | 1.00 | 3.00 | 0.00 | 3 | 3 | 3 | 3 | 3 | ▁▁▇▁▁ |
| SDS_P_SS_DA | 192 | 1.00 | 3.35 | 0.80 | 3 | 3 | 3 | 3 | 15 | ▇▁▁▁▁ |
| SDS_P_SS_DA_NM | 0 | 1.00 | 0.01 | 0.21 | 0 | 0 | 0 | 0 | 3 | ▇▁▁▁▁ |
| SDS_P_SS_DA_NT | 0 | 1.00 | 3.00 | 0.00 | 3 | 3 | 3 | 3 | 3 | ▁▁▇▁▁ |
| SDS_P_SS_SWTD | 237 | 0.99 | 7.94 | 2.48 | 6 | 6 | 7 | 9 | 30 | ▇▁▁▁▁ |
| SDS_P_SS_SWTD_NM | 0 | 1.00 | 0.03 | 0.42 | 0 | 0 | 0 | 0 | 6 | ▇▁▁▁▁ |
| SDS_P_SS_SWTD_NT | 0 | 1.00 | 6.00 | 0.00 | 6 | 6 | 6 | 6 | 6 | ▁▁▇▁▁ |
| SDS_P_SS_DOES | 193 | 1.00 | 7.10 | 2.54 | 5 | 5 | 6 | 8 | 25 | ▇▁▁▁▁ |
| SDS_P_SS_DOES_NM | 0 | 1.00 | 0.02 | 0.35 | 0 | 0 | 0 | 0 | 5 | ▇▁▁▁▁ |
| SDS_P_SS_DOES_NT | 0 | 1.00 | 5.00 | 0.00 | 5 | 5 | 5 | 5 | 5 | ▁▁▇▁▁ |
| SDS_P_SS_SHY | 193 | 1.00 | 2.38 | 1.06 | 2 | 2 | 2 | 2 | 10 | ▇▁▁▁▁ |
| SDS_P_SS_SHY_NM | 0 | 1.00 | 0.01 | 0.14 | 0 | 0 | 0 | 0 | 2 | ▇▁▁▁▁ |
| SDS_P_SS_SHY_NT | 0 | 1.00 | 2.00 | 0.00 | 2 | 2 | 2 | 2 | 2 | ▁▁▇▁▁ |
| SDS_P_SS_TOTAL | 238 | 0.99 | 36.43 | 8.06 | 26 | 31 | 35 | 40 | 126 | ▇▁▁▁▁ |
| SDS_P_SS_TOTAL_NM | 0 | 1.00 | 0.13 | 1.80 | 0 | 0 | 0 | 0 | 26 | ▇▁▁▁▁ |
| SDS_P_SS_TOTAL_NT | 0 | 1.00 | 26.00 | 0.00 | 26 | 26 | 26 | 26 | 26 | ▁▁▇▁▁ |
| PDS_P_SS_MALE_CATEGORY | 29433 | 0.26 | 1.46 | 0.68 | 1 | 1 | 1 | 2 | 5 | ▇▃▁▁▁ |
| PDS_P_SS_MALE_CAT_NM | 29016 | 0.27 | 0.05 | 0.26 | 0 | 0 | 0 | 0 | 3 | ▇▁▁▁▁ |
| PDS_P_SS_MALE_CAT_NT | 29016 | 0.27 | 3.00 | 0.00 | 3 | 3 | 3 | 3 | 3 | ▁▁▇▁▁ |
| PDS_P_SS_FEMALE_CATEGORY | 30131 | 0.24 | 2.42 | 0.95 | 1 | 2 | 3 | 3 | 5 | ▃▃▇▂▁ |
| PDS_P_SS_FEMALE_CAT_NM | 29699 | 0.25 | 0.05 | 0.28 | 0 | 0 | 0 | 0 | 3 | ▇▁▁▁▁ |
| PDS_P_SS_FEMALE_CAT_NT | 29699 | 0.25 | 3.00 | 0.00 | 3 | 3 | 3 | 3 | 3 | ▁▁▇▁▁ |
| CNA_P_SS_SUM | 29883 | 0.25 | 8.08 | 2.45 | 0 | 6 | 8 | 10 | 14 | ▁▂▇▇▂ |
| CNA_P_SS_SUM_NM | 17825 | 0.55 | 6.98 | 6.91 | 0 | 0 | 3 | 14 | 14 | ▇▁▁▁▇ |
| CNA_P_SS_SUM_NT | 17825 | 0.55 | 14.00 | 0.00 | 14 | 14 | 14 | 14 | 14 | ▁▁▇▁▁ |
| PDS_P_SS_FEMALE_CAT_2_NM | 20807 | 0.48 | 0.08 | 0.37 | 0 | 0 | 0 | 0 | 3 | ▇▁▁▁▁ |
| PDS_P_SS_FEMALE_CAT_2_NT | 20807 | 0.48 | 3.00 | 0.00 | 3 | 3 | 3 | 3 | 3 | ▁▁▇▁▁ |
| PDS_P_SS_FEMALE_CATEGORY_2 | 21864 | 0.45 | 2.83 | 1.01 | 1 | 2 | 3 | 4 | 5 | ▂▂▇▅▁ |
| PDS_P_SS_MALE_CATEGORY_2 | 19971 | 0.50 | 1.76 | 0.88 | 1 | 1 | 2 | 2 | 5 | ▇▅▂▁▁ |
| PDS_P_SS_MALE_CAT_2_NM | 18935 | 0.52 | 0.07 | 0.36 | 0 | 0 | 0 | 0 | 3 | ▇▁▁▁▁ |
| PDS_P_SS_MALE_CAT_2_NT | 18935 | 0.52 | 3.00 | 0.00 | 3 | 3 | 3 | 3 | 3 | ▁▁▇▁▁ |
# sds_p_ss_dims
# Disorders of Initiating and Maintaining Sleep (DIMS) SUM: sleepdisturb1_p + sleepdisturb2_p + sleepdisturb3_p + sleepdisturb4_p + sleepdisturb5_p + sleepdisturb10_p + sleepdisturb11_p; Validation: All items must be answered
#
# sds_p_ss_sbd
# Sleep Breathing disorders (SBD): SUM sleepdisturb13_p + sleepdisturb14_p + sleepdisturb15_p; Validation: All items must be answered
#
# sds_p_ss_da
# Disorder of Arousal (DA) SUM: sleepdisturb17_p + sleepdisturb20_p + sleepdisturb21_p; Validation: All items must be answered
#
# sds_p_ss_swtd
# Sleep-Wake transition Disorders (SWTD) SUM: sleepdisturb6_p + sleepdisturb7_p + sleepdisturb8_p + sleepdisturb12_p + sleepdisturb18_p + sleepdisturb19_p; Validation: All items must be answered
#
# sds_p_ss_does
# Disorders of Excessive Somnolence (DOES) SUM: sleepdisturb22_p + sleepdisturb23_p + sleepdisturb24_p + sleepdisturb25_p + sleepdisturb26_p; Validation: All items must be answered
#
# sds_p_ss_shy
# Sleep Hyperhydrosis (SHY) SUM: sleepdisturb9_p + sleepdisturb16_p; Validation: All items must be answered
#
# sds_p_ss_total
# Total Score (Sum of 6 Factors): sds_p_ss_dims + sds_p_ss_sbd + sds_p_ss_da + sds_p_ss_swtd + sds_p_ss_does + sds_p_ss_shy; Validation: All items must be answered
sleepSum <- sleepDis %>% full_join(PhysicalSum, by = c("SUBJECTKEY", "EVENTNAME")) %>%
dplyr::select(SUBJECTKEY, EVENTNAME, SLEEPDISTURB1_P, SLEEPDISTURB2_P,
SDS_P_SS_DIMS, SDS_P_SS_SBD, SDS_P_SS_DA, SDS_P_SS_SWTD, SDS_P_SS_DOES, SDS_P_SS_SHY, SDS_P_SS_TOTAL) %>%
rename(sleep_hours = SLEEPDISTURB1_P) %>%
rename(sleep_disturb = SLEEPDISTURB2_P) %>%
rename(sleep_initiate_maintain = SDS_P_SS_DIMS) %>%
rename(sleep_breath = SDS_P_SS_SBD) %>%
rename(sleep_arousal = SDS_P_SS_DA) %>%
rename(sleep_transition = SDS_P_SS_SWTD) %>%
rename(sleep_somnolence = SDS_P_SS_DOES) %>%
rename(sleep_hyperhydrosis = SDS_P_SS_SHY) %>%
rename(sleep_total = SDS_P_SS_TOTAL)
sleepSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 9 |
| _______________________ | |
| Column type frequency: | |
| numeric | 9 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| sleep_hours | 5 | 1 | 1.72 | 0.81 | 1 | 1 | 2 | 2 | 5 | ▇▆▂▁▁ |
| sleep_disturb | 5 | 1 | 1.93 | 0.98 | 1 | 1 | 2 | 2 | 5 | ▇▇▂▁▁ |
| sleep_initiate_maintain | 5 | 1 | 11.76 | 3.75 | 7 | 9 | 11 | 13 | 35 | ▇▃▁▁▁ |
| sleep_breath | 5 | 1 | 3.77 | 1.26 | 3 | 3 | 3 | 4 | 15 | ▇▁▁▁▁ |
| sleep_arousal | 5 | 1 | 3.44 | 0.92 | 3 | 3 | 3 | 4 | 15 | ▇▁▁▁▁ |
| sleep_transition | 32 | 1 | 8.18 | 2.63 | 6 | 6 | 7 | 9 | 30 | ▇▁▁▁▁ |
| sleep_somnolence | 6 | 1 | 6.95 | 2.44 | 5 | 5 | 6 | 8 | 25 | ▇▁▁▁▁ |
| sleep_hyperhydrosis | 5 | 1 | 2.44 | 1.18 | 2 | 2 | 2 | 2 | 10 | ▇▁▁▁▁ |
| sleep_total | 33 | 1 | 36.54 | 8.24 | 26 | 31 | 35 | 40 | 126 | ▇▁▁▁▁ |
sleepSum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 9 |
| _______________________ | |
| Column type frequency: | |
| numeric | 9 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| sleep_hours | 74 | 0.99 | 1.99 | 0.87 | 1 | 1 | 2 | 2 | 5 | ▆▇▃▁▁ |
| sleep_disturb | 74 | 0.99 | 2.05 | 1.05 | 1 | 1 | 2 | 3 | 5 | ▇▇▃▁▁ |
| sleep_initiate_maintain | 74 | 0.99 | 12.04 | 3.77 | 7 | 9 | 11 | 14 | 34 | ▇▃▁▁▁ |
| sleep_breath | 74 | 0.99 | 3.69 | 1.16 | 3 | 3 | 3 | 4 | 15 | ▇▁▁▁▁ |
| sleep_arousal | 74 | 0.99 | 3.31 | 0.72 | 3 | 3 | 3 | 3 | 11 | ▇▁▁▁▁ |
| sleep_transition | 75 | 0.99 | 7.80 | 2.43 | 6 | 6 | 7 | 9 | 28 | ▇▁▁▁▁ |
| sleep_somnolence | 74 | 0.99 | 7.14 | 2.59 | 5 | 5 | 6 | 8 | 25 | ▇▁▁▁▁ |
| sleep_hyperhydrosis | 74 | 0.99 | 2.34 | 0.99 | 2 | 2 | 2 | 2 | 10 | ▇▁▁▁▁ |
| sleep_total | 75 | 0.99 | 36.33 | 8.06 | 26 | 31 | 34 | 40 | 105 | ▇▂▁▁▁ |
ABCD Youth Screen Time Survey abcd_stq01
Youth Screen Time Survey This measure includes customized questions about the overall amount of time that the youth spends using visual media, on a typical weekday and weekend day. Media activities assessed include: (1) Watching TV shows or movies; (2) Watching videos (such as YouTube); (3) Playing video games on a computer, console, phone or other device; (4) Texting on a cell phone, tablet, or computer; (5) Visiting social networking sites like Facebook, Twitter, Instagram; (6) Video chat. Seven response options were: none, < 30 minutes, 30 minutes, 1 hour, 2 hours, 3 hours, and 4+ hours.
youthScreen <-as_tibble(read.csv(paste0(dataFold,"ABCD_STQ01_DATA_TABLE.csv")))
# filter(EVENTNAME =="baseline_year_1_arm_1")
#On a typical weekend/weekday, how many hours do you
#0 = None; .25 = < 30 minutes; 0.5 = 30 minutes; 1 = 1 hour; 2 = 2 hours; 3 = 3 hours; 4 = 4+ hours //Example: 1½ hours would be coded as 1 hour, rather than 2 hours.
#How often do you play mature-rated video games (e.g., Call of Duty, Grand Theft Auto, Assassin's Creed, etc.)?
#How often do you watch R-rated movies?
youthScreenAdded <- youthScreen %>%
mutate(wkdySum_Screen = rowSums(dplyr::select(.,ends_with("WKDY_Y")))) %>%
mutate(wkndSum_Screen = rowSums(dplyr::select(.,ends_with("WKND_Y")))) %>%
rename(matureGames_Screen = SCREEN13_Y) %>%
rename(matureMovies_Screen = SCREEN14_Y)
youthScreenSum <- youthScreenAdded %>%
dplyr::select(SUBJECTKEY,EVENTNAME, ends_with("_Screen"))
youthScreenSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| matureGames_Screen | 20 | 1 | 0.57 | 0.87 | 0 | 0.00 | 0.0 | 1.00 | 3 | ▇▃▁▁▁ |
| matureMovies_Screen | 21 | 1 | 0.38 | 0.64 | 0 | 0.00 | 0.0 | 1.00 | 3 | ▇▃▁▁▁ |
| wkdySum_Screen | 38 | 1 | 3.46 | 3.10 | 0 | 1.25 | 2.5 | 4.75 | 24 | ▇▂▁▁▁ |
| wkndSum_Screen | 43 | 1 | 4.62 | 3.63 | 0 | 2.00 | 3.5 | 6.25 | 24 | ▇▃▁▁▁ |
youthScreenSum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| matureGames_Screen | 29 | 1 | 0.62 | 0.88 | 0 | 0 | 0 | 1 | 3 | ▇▃▁▂▁ |
| matureMovies_Screen | 37 | 1 | 0.49 | 0.65 | 0 | 0 | 0 | 1 | 3 | ▇▅▁▁▁ |
| wkdySum_Screen | 10414 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| wkndSum_Screen | 10414 | 0 | NaN | NA | NA | NA | NA | NA | NA |
#ABCD Developmental History Questionnaire
DevHis <-as_tibble(read.csv(paste0(dataFold,"DHX01_DATA_TABLE.csv"))) %>%
#filter(VISIT =="baseline_year_1_arm_1") %>%
rename(EVENTNAME = VISIT)
#glimpse(DevHis)
DevHis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(starts_with("DEVHX_8")) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 83 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| logical | 10 |
| numeric | 68 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| DEVHX_8_RXNORM_MED1 | 0 | 1 | 0 | 190 | 10578 | 679 | 0 |
| DEVHX_8_RXNORM_MED2 | 0 | 1 | 0 | 118 | 11691 | 130 | 0 |
| DEVHX_8_RXNORM_MED3 | 0 | 1 | 0 | 79 | 11831 | 37 | 0 |
| DEVHX_8_OTHER1_NAME_OTH | 0 | 1 | 0 | 36 | 11854 | 19 | 0 |
| DEVHX_8_OTHER3_NAME_OTH | 0 | 1 | 0 | 30 | 11872 | 5 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| DEVHX_8_PRESCRIPT_YES | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER2_NAME_OTH | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER4_NAME_OTH | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER4_TIMES | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER4_AMT | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER4_UNIT | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER5_NAME_OTH | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER5_TIMES | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER5_AMT | 11876 | 0 | NaN | : |
| DEVHX_8_OTHER5_UNIT | 11876 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| DEVHX_8_PRESCRIPT_MED | 5 | 1.00 | 74.00 | 261.43 | 0.00 | 0.00 | 0.0 | 0.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_MED1_PRN | 11245 | 0.05 | 36.69 | 187.31 | 0.00 | 0.00 | 0.0 | 1.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_MED1_TIMES | 10806 | 0.09 | 1.26 | 0.75 | 0.00 | 1.00 | 1.0 | 1.00 | 10 | ▇▁▁▁▁ |
| DEVHX_8_MED1_TIMES_DK | 11718 | 0.01 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_MED1_HOW_MUCH | 10969 | 0.08 | 82.50 | 191.78 | 0.00 | 1.00 | 10.0 | 88.00 | 2000 | ▇▁▁▁▁ |
| DEVHX_8_MED1_HOW_MUCH_DK | 11452 | 0.04 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_MED1_UNIT | 11044 | 0.07 | 2.69 | 0.79 | 1.00 | 2.75 | 3.0 | 3.00 | 10 | ▃▇▁▁▁ |
| DEVHX_8_MED1_FU | 11099 | 0.07 | 0.25 | 0.43 | 0.00 | 0.00 | 0.0 | 1.00 | 1 | ▇▁▁▁▃ |
| DEVHX_8_MED2_PRN | 11709 | 0.01 | 36.19 | 186.43 | 0.00 | 0.00 | 0.0 | 1.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_MED2_TIMES | 11730 | 0.01 | 1.40 | 0.77 | 0.00 | 1.00 | 1.0 | 2.00 | 6 | ▇▂▁▁▁ |
| DEVHX_8_MED2_TIMES_DK | 11844 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_MED2_HOW_MUCH | 11782 | 0.01 | 87.58 | 247.66 | 0.00 | 1.00 | 2.5 | 50.00 | 2000 | ▇▁▁▁▁ |
| DEVHX_8_MED2_HOW_MUCH_DK | 11788 | 0.01 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_MED2_UNIT | 11791 | 0.01 | 2.74 | 1.09 | 1.00 | 2.00 | 3.0 | 3.00 | 10 | ▃▇▁▁▁ |
| DEVHX_8_MED2_FU | 11698 | 0.01 | 0.26 | 0.44 | 0.00 | 0.00 | 0.0 | 1.00 | 1 | ▇▁▁▁▃ |
| DEVHX_8_MED3_PRN | 11846 | 0.00 | 0.27 | 0.45 | 0.00 | 0.00 | 0.0 | 0.75 | 1 | ▇▁▁▁▃ |
| DEVHX_8_MED3_TIMES | 11819 | 0.00 | 1.30 | 0.80 | 0.00 | 1.00 | 1.0 | 1.00 | 4 | ▁▇▂▁▁ |
| DEVHX_8_MED3_TIMES_DK | 11866 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_MED3_HOW_MUCH | 11852 | 0.00 | 189.32 | 478.06 | 0.25 | 1.00 | 1.5 | 42.50 | 2000 | ▇▁▁▁▁ |
| DEVHX_8_MED3_HOW_MUCH_DK | 11853 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_MED3_UNIT | 11856 | 0.00 | 2.55 | 0.76 | 1.00 | 2.00 | 3.0 | 3.00 | 3 | ▂▁▂▁▇ |
| DEVHX_8_TOBACCO | 6 | 1.00 | 23.11 | 149.74 | 0.00 | 0.00 | 0.0 | 0.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_CIGS_PER_DAY | 10453 | 0.12 | 8.35 | 6.44 | 0.00 | 4.00 | 6.0 | 10.00 | 80 | ▇▁▁▁▁ |
| DEVHX_8_CIGS_PER_DAY_DK | 11689 | 0.02 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_ALCOHOL | 6 | 1.00 | 57.22 | 231.63 | 0.00 | 0.00 | 0.0 | 1.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_ALCHOHOL_MAX | 9334 | 0.21 | 2.40 | 1.50 | 0.00 | 1.00 | 2.0 | 3.00 | 20 | ▇▁▁▁▁ |
| DEVHX_8_ALCHOHOL_MAX_DK | 11536 | 0.03 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_ALCHOHOL_AVG | 9369 | 0.21 | 3.97 | 4.38 | 0.00 | 1.00 | 3.0 | 5.00 | 52 | ▇▁▁▁▁ |
| DEVHX_8_ALCHOHOL_AVG_DK | 11501 | 0.03 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_ALCOHOL_EFFECTS | 9512 | 0.20 | 2.08 | 1.24 | 0.00 | 1.00 | 2.0 | 2.00 | 20 | ▇▁▁▁▁ |
| DEVHX_8_ALCOHOL_EFFECTS_DK | 11358 | 0.04 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_MARIJUANA | 5 | 1.00 | 28.25 | 165.43 | 0.00 | 0.00 | 0.0 | 0.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_MARIJUANA_AMT | 11387 | 0.04 | 2.09 | 2.49 | 0.00 | 1.00 | 2.0 | 3.00 | 40 | ▇▁▁▁▁ |
| DEVHX_8_MARIJUANA_AMT_DK | 11685 | 0.02 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_COC_CRACK | 5 | 1.00 | 23.74 | 152.14 | 0.00 | 0.00 | 0.0 | 0.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_COC_CRACK_AMT | 11843 | 0.00 | 3.00 | 3.71 | 0.00 | 1.00 | 2.0 | 3.00 | 20 | ▇▁▁▁▁ |
| DEVHX_8_COC_CRACK_AMT_DK | 11815 | 0.01 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_HER_MORPH | 5 | 1.00 | 24.41 | 154.23 | 0.00 | 0.00 | 0.0 | 0.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_HER_MORPH_AMT | 11873 | 0.00 | 2.67 | 1.15 | 2.00 | 2.00 | 2.0 | 3.00 | 4 | ▇▁▁▁▃ |
| DEVHX_8_HER_MORPH_AMT_DK | 11855 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OXYCONT | 5 | 1.00 | 25.08 | 156.29 | 0.00 | 0.00 | 0.0 | 0.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_OXYCONT_AMT | 11861 | 0.00 | 2.13 | 1.19 | 0.00 | 1.00 | 2.0 | 3.00 | 4 | ▂▇▇▇▃ |
| DEVHX_8_OXYCONT_AMT_DK | 11849 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER_DRUGS | 5 | 1.00 | 32.07 | 176.08 | 0.00 | 0.00 | 0.0 | 0.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_OTHER1_NAME_2 | 11779 | 0.01 | 46.53 | 198.62 | 0.00 | 1.00 | 3.0 | 12.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_OTHER1_TIMES | 11829 | 0.00 | 1.94 | 1.98 | 0.00 | 1.00 | 1.0 | 2.00 | 10 | ▇▂▁▁▁ |
| DEVHX_8_OTHER1_TIMES_DK | 11824 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER1_AMT | 11845 | 0.00 | 92.94 | 286.57 | 0.00 | 1.00 | 2.0 | 16.25 | 1500 | ▇▁▁▁▁ |
| DEVHX_8_OTHER1_AMT_DK | 11808 | 0.01 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER1_UNIT | 11846 | 0.00 | 3.20 | 2.11 | 1.00 | 1.25 | 3.0 | 3.00 | 7 | ▆▇▁▁▃ |
| DEVHX_8_OTHER2_NAME_2 | 11797 | 0.01 | 13.77 | 112.32 | 0.00 | 0.00 | 0.0 | 0.00 | 999 | ▇▁▁▁▁ |
| DEVHX_8_OTHER2_TIMES | 11871 | 0.00 | 1.40 | 0.89 | 1.00 | 1.00 | 1.0 | 1.00 | 3 | ▇▁▁▁▂ |
| DEVHX_8_OTHER2_TIMES_DK | 11869 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER2_AMT | 11874 | 0.00 | 6.00 | 2.83 | 4.00 | 5.00 | 6.0 | 7.00 | 8 | ▇▁▁▁▇ |
| DEVHX_8_OTHER2_AMT_DK | 11866 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER2_UNIT | 11874 | 0.00 | 5.00 | 2.83 | 3.00 | 4.00 | 5.0 | 6.00 | 7 | ▇▁▁▁▇ |
| DEVHX_8_OTHER3_NAME_2 | 11868 | 0.00 | 3.00 | 5.26 | 0.00 | 0.00 | 0.0 | 3.50 | 12 | ▇▁▁▁▂ |
| DEVHX_8_OTHER3_TIMES | 11874 | 0.00 | 2.00 | 1.41 | 1.00 | 1.50 | 2.0 | 2.50 | 3 | ▇▁▁▁▇ |
| DEVHX_8_OTHER3_TIMES_DK | 11875 | 0.00 | 999.00 | NA | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER3_AMT | 11875 | 0.00 | 10.00 | NA | 10.00 | 10.00 | 10.0 | 10.00 | 10 | ▁▁▇▁▁ |
| DEVHX_8_OTHER3_AMT_DK | 11874 | 0.00 | 999.00 | 0.00 | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER3_UNIT | 11875 | 0.00 | 3.00 | NA | 3.00 | 3.00 | 3.0 | 3.00 | 3 | ▁▁▇▁▁ |
| DEVHX_8_OTHER4_NAME_2 | 11873 | 0.00 | 0.33 | 0.58 | 0.00 | 0.00 | 0.0 | 0.50 | 1 | ▇▁▁▁▃ |
| DEVHX_8_OTHER4_TIMES_DK | 11875 | 0.00 | 999.00 | NA | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER4_AMT_DK | 11875 | 0.00 | 999.00 | NA | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER5_NAME_2 | 11875 | 0.00 | 1.00 | NA | 1.00 | 1.00 | 1.0 | 1.00 | 1 | ▁▁▇▁▁ |
| DEVHX_8_OTHER5_TIMES_DK | 11875 | 0.00 | 999.00 | NA | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
| DEVHX_8_OTHER5_AMT_DK | 11875 | 0.00 | 999.00 | NA | 999.00 | 999.00 | 999.0 | 999.00 | 999 | ▁▁▇▁▁ |
DevHis %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(starts_with("DEVHX_8")) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 0 |
| Number of columns | 83 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| logical | 10 |
| numeric | 68 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| DEVHX_8_RXNORM_MED1 | 0 | NaN | NA | NA | 0 | 0 | 0 |
| DEVHX_8_RXNORM_MED2 | 0 | NaN | NA | NA | 0 | 0 | 0 |
| DEVHX_8_RXNORM_MED3 | 0 | NaN | NA | NA | 0 | 0 | 0 |
| DEVHX_8_OTHER1_NAME_OTH | 0 | NaN | NA | NA | 0 | 0 | 0 |
| DEVHX_8_OTHER3_NAME_OTH | 0 | NaN | NA | NA | 0 | 0 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| DEVHX_8_PRESCRIPT_YES | 0 | NaN | NaN | : |
| DEVHX_8_OTHER2_NAME_OTH | 0 | NaN | NaN | : |
| DEVHX_8_OTHER4_NAME_OTH | 0 | NaN | NaN | : |
| DEVHX_8_OTHER4_TIMES | 0 | NaN | NaN | : |
| DEVHX_8_OTHER4_AMT | 0 | NaN | NaN | : |
| DEVHX_8_OTHER4_UNIT | 0 | NaN | NaN | : |
| DEVHX_8_OTHER5_NAME_OTH | 0 | NaN | NaN | : |
| DEVHX_8_OTHER5_TIMES | 0 | NaN | NaN | : |
| DEVHX_8_OTHER5_AMT | 0 | NaN | NaN | : |
| DEVHX_8_OTHER5_UNIT | 0 | NaN | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| DEVHX_8_PRESCRIPT_MED | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED1_PRN | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED1_TIMES | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED1_TIMES_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED1_HOW_MUCH | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED1_HOW_MUCH_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED1_UNIT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED1_FU | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED2_PRN | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED2_TIMES | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED2_TIMES_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED2_HOW_MUCH | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED2_HOW_MUCH_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED2_UNIT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED2_FU | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED3_PRN | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED3_TIMES | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED3_TIMES_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED3_HOW_MUCH | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED3_HOW_MUCH_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MED3_UNIT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_TOBACCO | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_CIGS_PER_DAY | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_CIGS_PER_DAY_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_ALCOHOL | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_ALCHOHOL_MAX | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_ALCHOHOL_MAX_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_ALCHOHOL_AVG | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_ALCHOHOL_AVG_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_ALCOHOL_EFFECTS | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_ALCOHOL_EFFECTS_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MARIJUANA | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MARIJUANA_AMT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_MARIJUANA_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_COC_CRACK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_COC_CRACK_AMT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_COC_CRACK_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_HER_MORPH | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_HER_MORPH_AMT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_HER_MORPH_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OXYCONT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OXYCONT_AMT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OXYCONT_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER_DRUGS | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER1_NAME_2 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER1_TIMES | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER1_TIMES_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER1_AMT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER1_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER1_UNIT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER2_NAME_2 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER2_TIMES | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER2_TIMES_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER2_AMT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER2_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER2_UNIT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER3_NAME_2 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER3_TIMES | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER3_TIMES_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER3_AMT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER3_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER3_UNIT | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER4_NAME_2 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER4_TIMES_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER4_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER5_NAME_2 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER5_TIMES_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_8_OTHER5_AMT_DK | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA |
#devhx_8_tobacco
#Before knowing of pregnancy. Tobacco? How many times per day?/ ?Cuantas veces al d?a?
#devhx_9_tobacco
#Knowing of pregnancy. Tobacco? How many times per day?/ ?Cuantas veces al d?a?
#devhx_8_alcohol
#Before knowing of pregnancy. Alcohol? /?Alcohol?
#devhx_9_alcohol
#Knowing of pregnancy. Alcohol? /?Alcohol?
#devhx_8_marijuana
#Before knowing of pregnancy. Marijuana? /?Marihuana?
#devhx_9_marijuana
#Knowing of pregnancy. Marijuana? /?Marihuana?
# change name and replace 999 with na
momSubstanceUse <- DevHis %>%
mutate_if(is.numeric, ~na_if(., 999)) %>%
mutate(tobacco_before_preg = as.factor(DEVHX_8_TOBACCO)) %>%
mutate(tobacco_after_preg = as.factor(DEVHX_9_TOBACCO)) %>%
mutate(alcohol_before_preg = as.factor(DEVHX_8_ALCOHOL)) %>%
mutate(alcohol_after_preg = as.factor(DEVHX_9_ALCOHOL)) %>%
mutate(marijuana_before_preg = as.factor(DEVHX_8_MARIJUANA)) %>%
mutate(marijuana_after_preg = as.factor(DEVHX_9_MARIJUANA)) %>%
dplyr::select(SUBJECTKEY,EVENTNAME, ends_with("_preg")) %>%
droplevels()
momSubstanceUse %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| factor | 6 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| tobacco_before_preg | 279 | 0.98 | FALSE | 2 | 0: 9987, 1: 1610 |
| tobacco_after_preg | 265 | 0.98 | FALSE | 2 | 0: 10991, 1: 620 |
| alcohol_before_preg | 683 | 0.94 | FALSE | 2 | 0: 8311, 1: 2882 |
| alcohol_after_preg | 293 | 0.98 | FALSE | 2 | 0: 11268, 1: 315 |
| marijuana_before_preg | 340 | 0.97 | FALSE | 2 | 0: 10849, 1: 687 |
| marijuana_after_preg | 277 | 0.98 | FALSE | 2 | 0: 11354, 1: 245 |
momSubstanceUse %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 0 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| factor | 6 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| tobacco_before_preg | 0 | NaN | FALSE | 0 | 0: 0, 1: 0 |
| tobacco_after_preg | 0 | NaN | FALSE | 0 | 0: 0, 1: 0 |
| alcohol_before_preg | 0 | NaN | FALSE | 0 | 0: 0, 1: 0 |
| alcohol_after_preg | 0 | NaN | FALSE | 0 | 0: 0, 1: 0 |
| marijuana_before_preg | 0 | NaN | FALSE | 0 | 0: 0, 1: 0 |
| marijuana_after_preg | 0 | NaN | FALSE | 0 | 0: 0, 1: 0 |
DevHis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(starts_with(c("BIRTH_WEIGHT","DEVHX_10","DEVHX_12","DEVHX_13","DEVHX_14"))) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 30 |
| _______________________ | |
| Column type frequency: | |
| logical | 3 |
| numeric | 27 |
| ________________________ | |
| Group variables | None |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| DEVHX_10C3_C_P | 11876 | 0 | NaN | : |
| DEVHX_10C3_A_P | 11876 | 0 | NaN | : |
| DEVHX_10C3_B_P | 11876 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| BIRTH_WEIGHT_LBS | 517 | 0.96 | 6.59 | 1.47 | 1 | 6 | 7 | 8 | 14 | ▁▆▇▁▁ |
| BIRTH_WEIGHT_OZ | 1241 | 0.90 | 7.16 | 4.35 | 0 | 4 | 7 | 11 | 15 | ▇▇▇▆▅ |
| DEVHX_10 | 9 | 1.00 | 39.22 | 191.65 | -1 | 1 | 1 | 1 | 999 | ▇▁▁▁▁ |
| DEVHX_10A3_P | 4 | 1.00 | 32.45 | 176.72 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10B3_P | 4 | 1.00 | 27.22 | 162.52 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10C3_P | 4 | 1.00 | 32.47 | 176.95 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10D3_P | 4 | 1.00 | 27.95 | 164.71 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10E3_P | 4 | 1.00 | 34.17 | 181.56 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10F3_P | 4 | 1.00 | 26.26 | 159.81 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10G3_P | 4 | 1.00 | 31.43 | 174.27 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10H3_P | 4 | 1.00 | 39.29 | 193.99 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10I3_P | 4 | 1.00 | 30.78 | 172.45 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10J3_P | 4 | 1.00 | 33.92 | 180.68 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10K3_P | 4 | 1.00 | 29.99 | 170.38 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10L3_P | 4 | 1.00 | 26.61 | 160.80 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_10M3_P | 4 | 1.00 | 28.95 | 167.33 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_12A_P | 5 | 1.00 | 11.97 | 107.83 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_12_P | 9668 | 0.19 | 17.10 | 109.30 | 1 | 3 | 4 | 6 | 999 | ▇▁▁▁▁ |
| DEVHX_13_3_P | 5 | 1.00 | 12.58 | 109.70 | 0 | 0 | 0 | 1 | 999 | ▇▁▁▁▁ |
| DEVHX_14A3_P | 5 | 1.00 | 26.71 | 161.06 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_14B3_P | 5 | 1.00 | 28.56 | 166.39 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_14C3_P | 5 | 1.00 | 25.04 | 156.03 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_14D3_P | 5 | 1.00 | 18.43 | 134.44 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_14E3_P | 5 | 1.00 | 24.23 | 153.16 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_14F3_P | 5 | 1.00 | 25.09 | 156.02 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_14G3_P | 5 | 1.00 | 17.42 | 130.77 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
| DEVHX_14H3_P | 6 | 1.00 | 34.36 | 182.00 | 0 | 0 | 0 | 0 | 999 | ▇▁▁▁▁ |
DevHis %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(starts_with(c("BIRTH_WEIGHT","DEVHX_10","DEVHX_12","DEVHX_13","DEVHX_14"))) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 0 |
| Number of columns | 30 |
| _______________________ | |
| Column type frequency: | |
| logical | 3 |
| numeric | 27 |
| ________________________ | |
| Group variables | None |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| DEVHX_10C3_C_P | 0 | NaN | NaN | : |
| DEVHX_10C3_A_P | 0 | NaN | NaN | : |
| DEVHX_10C3_B_P | 0 | NaN | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| BIRTH_WEIGHT_LBS | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| BIRTH_WEIGHT_OZ | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10A3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10B3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10C3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10D3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10E3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10F3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10G3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10H3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10I3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10J3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10K3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10L3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_10M3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_12A_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_12_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_13_3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_14A3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_14B3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_14C3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_14D3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_14E3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_14F3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_14G3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DEVHX_14H3_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA |
#devhx_12a_p
#Was the child born prematurely? /?Naci? el ni?o o la ni?a antes de tiempo?
adversitySum <- DevHis %>%
mutate_if(is.numeric, ~na_if(., 999)) %>%
mutate(deveplopment_prematurity = as.factor(DEVHX_12A_P)) %>%
mutate(deveplopment_birth_complications = rowSums(dplyr::select(.,starts_with("DEVHX_14")))) %>%
#mutate(deveplopment_birth_kg = BIRTH_WEIGHT_LBS*0.453592) %>% #all na???
mutate(deveplopment_pregnancy_complications = rowSums(dplyr::select(.,DEVHX_10A3_P:DEVHX_10L3_P))) %>%
dplyr::select(SUBJECTKEY,EVENTNAME, starts_with("deveplopment_"))
adversitySum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| factor | 1 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| deveplopment_prematurity | 145 | 0.99 | FALSE | 2 | 0: 9523, 1: 2208 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| deveplopment_birth_complications | 760 | 0.94 | 0.37 | 0.75 | 0 | 0 | 0 | 1 | 8 | ▇▁▁▁▁ |
| deveplopment_pregnancy_complications | 743 | 0.94 | 0.61 | 1.02 | 0 | 0 | 0 | 1 | 12 | ▇▁▁▁▁ |
adversitySum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 0 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| factor | 1 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| deveplopment_prematurity | 0 | NaN | FALSE | 0 | 0: 0, 1: 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| deveplopment_birth_complications | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| deveplopment_pregnancy_complications | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA |
most events are quite rare.
brainTruma <- as_tibble(read.csv(paste0(dataFold,"ABCD_OTBI01_DATA_TABLE.csv")))
brainTruma %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-1:-8) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 39 |
| _______________________ | |
| Column type frequency: | |
| logical | 3 |
| numeric | 36 |
| ________________________ | |
| Group variables | None |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| TBI_8I | 11876 | 0 | NaN | : |
| TBI_8K | 11876 | 0 | NaN | : |
| TBI_8L | 11876 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| TBI_SELECT_LANGUAGE___1 | 0 | 1.00 | 0.05 | 0.23 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_1 | 4 | 1.00 | 0.12 | 0.33 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_1B | 10434 | 0.12 | 0.07 | 0.28 | 0.0 | 0.00 | 0.0 | 0.00 | 3 | ▇▁▁▁▁ |
| TBI_1C | 10436 | 0.12 | 0.17 | 0.37 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▂ |
| TBI_1D | 10435 | 0.12 | 4.98 | 2.63 | 0.0 | 3.00 | 5.0 | 7.00 | 11 | ▇▇▇▇▃ |
| TBI_2 | 4 | 1.00 | 0.02 | 0.12 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_2B | 11688 | 0.02 | 0.04 | 0.19 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_2C | 11688 | 0.02 | 0.12 | 0.33 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_2D | 11688 | 0.02 | 6.61 | 2.18 | 0.0 | 5.00 | 7.0 | 8.00 | 10 | ▁▂▅▇▃ |
| TBI_3 | 4 | 1.00 | 0.12 | 0.33 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_3B | 10396 | 0.12 | 0.04 | 0.21 | 0.0 | 0.00 | 0.0 | 0.00 | 3 | ▇▁▁▁▁ |
| TBI_3C | 10397 | 0.12 | 0.16 | 0.37 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▂ |
| TBI_3D | 10398 | 0.12 | 5.86 | 2.70 | 0.0 | 4.00 | 6.0 | 8.00 | 11 | ▅▅▆▇▅ |
| TBI_4 | 4 | 1.00 | 0.00 | 0.07 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_4B | 11817 | 0.00 | 0.02 | 0.13 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_4C | 11817 | 0.00 | 0.17 | 0.38 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▂ |
| TBI_4D | 11817 | 0.00 | 6.53 | 2.44 | 0.0 | 5.00 | 7.0 | 8.00 | 10 | ▂▂▅▇▅ |
| TBI_5 | 4 | 1.00 | 0.00 | 0.03 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_5B | 11866 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.0 | 0.00 | 0 | ▁▁▇▁▁ |
| TBI_5C | 11866 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.0 | 0.00 | 0 | ▁▁▇▁▁ |
| TBI_5D | 11866 | 0.00 | 5.10 | 2.91 | 1.5 | 2.25 | 5.5 | 7.75 | 9 | ▇▁▃▂▆ |
| TBI_6O | 4 | 1.00 | 0.00 | 0.03 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_6P | 11867 | 0.00 | 1.67 | 1.12 | 1.0 | 1.00 | 1.0 | 2.00 | 4 | ▇▁▁▁▁ |
| TBI_6Q | 11867 | 0.00 | 1.78 | 1.20 | 0.0 | 1.00 | 2.0 | 2.00 | 4 | ▂▇▇▂▂ |
| TBI_6R | 11867 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.0 | 0.00 | 0 | ▁▁▇▁▁ |
| TBI_6S | 11867 | 0.00 | 4.56 | 3.71 | 1.0 | 1.00 | 3.0 | 8.00 | 10 | ▇▂▂▂▃ |
| TBI_7A | 4 | 1.00 | 0.01 | 0.09 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_7C1 | 11769 | 0.01 | 0.07 | 0.37 | 0.0 | 0.00 | 0.0 | 0.00 | 3 | ▇▁▁▁▁ |
| TBL_7C2 | 11773 | 0.01 | 0.08 | 0.27 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_7E | 11769 | 0.01 | 4.50 | 3.63 | 0.0 | 1.00 | 5.0 | 8.00 | 10 | ▇▁▂▅▃ |
| TBI_7F | 11769 | 0.01 | 5.62 | 4.11 | 0.0 | 0.50 | 8.0 | 9.00 | 10 | ▆▂▁▂▇ |
| TBI_7G | 11769 | 0.01 | 0.02 | 0.14 | 0.0 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| TBI_7I | 11874 | 0.00 | 1.00 | 0.00 | 1.0 | 1.00 | 1.0 | 1.00 | 1 | ▁▁▇▁▁ |
| TBI_7K | 11874 | 0.00 | 4.00 | 5.66 | 0.0 | 2.00 | 4.0 | 6.00 | 8 | ▇▁▁▁▇ |
| TBI_7L | 11874 | 0.00 | 4.00 | 5.66 | 0.0 | 2.00 | 4.0 | 6.00 | 8 | ▇▁▁▁▇ |
| TBI_8G | 11874 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.0 | 0.00 | 0 | ▁▁▇▁▁ |
brainTruma %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-1:-8) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 0 |
| Number of columns | 39 |
| _______________________ | |
| Column type frequency: | |
| logical | 3 |
| numeric | 36 |
| ________________________ | |
| Group variables | None |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| TBI_8I | 0 | NaN | NaN | : |
| TBI_8K | 0 | NaN | NaN | : |
| TBI_8L | 0 | NaN | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| TBI_SELECT_LANGUAGE___1 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_1 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_1B | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_1C | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_1D | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_2 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_2B | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_2C | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_2D | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_3 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_3B | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_3C | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_3D | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_4 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_4B | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_4C | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_4D | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_5 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_5B | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_5C | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_5D | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_6O | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_6P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_6Q | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_6R | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_6S | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_7A | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_7C1 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBL_7C2 | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_7E | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_7F | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_7G | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_7I | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_7K | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_7L | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| TBI_8G | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA |
https://www.nature.com/articles/s41562-019-0609-3 https://github.com/anthonystevendick/bilingual_abcd/blob/master/bilingual_analysis.r
accult_q1_y How well do you speak English?
1 = Poor; 2 = Fair; 3 = Good; 4 = Excellent accult_q2_y Besides English,
do you speak or understand another language or dialect? If child asks
about languages learned in school, the RA should state: That’s OK, as
long as it is a language or dialect that you speak or understand.
1 = Poor Mal; 2 = Fair Regular; 3 = Good Bien; 4 = Excellent Excelente;
777 = Refused Niego contestar; 999 = Don’t Know No se
accult_q4_y What language do you speak with most of your friends?
1 = (Other language) all the time; 2 = (Other language) most of the
time; 3 = (Other language) and English equally; 4 = English most of the
time; 5 = English all the time
accult_q5_y What language do you speak with most of your family? 1 =
(Other language) all the time; 2 = (Other language) most of the time; 3
= (Other language) and English equally; 4 = English most of the time; 5
= English all the time
bilingual <-as_tibble(read.csv(paste0(dataFold,"YACC01_DATA_TABLE.csv")))
#%>% filter(EVENTNAME =="baseline_year_1_arm_1")
#bilingual_status
# #recode the accult_q2_y variable into a binary "Bilingual Status", 0 = not bilingual; 1 = bilingual
#
# bilingual_status <- biLingual$ACCULT_Q2_Y
# sum(is.na(bilingual_status))
#bilingual_degree
# #dimension a 'bilingual degree' variable, where 1 = participant said they were bilingual, and they speak the other language with friends all the time, most of the time,
# #or equally, OR they speak the other language with family all the time, most of the time, or equally.
#
# bilingual_degree <- ifelse(bilingual_status == 0, 0, ifelse(bilingual_status == 1 & (as.numeric(accult_q4_y) <= 3 | as.numeric(accult_q5_y) <= 3), 1, NA))
# count(bilingual_degree) #check the data
# sum(is.na(bilingual_degree))
#### here I change it such that 0 = non-bilingual, 1 = bilingual who use (Other language) < English, 2 = bilingual who use (Other language) >= English
#bilingual_use
#
# #dimension a continuous 'bilingual use' variable, and reverse-score so that if participants speak the other language with friends all the time, most of the time...,
# #they will receive high scores on this measure (range 0-8, with 8 indicating a high-degree of other language use)
#
# bilingual_use<-10-(as.numeric(abcd_subset$accult_q4_y)+as.numeric(abcd_subset$accult_q5_y))
# sum(is.na(bilingual_use))
#### here I change it such that 0 = non-bilingual, 1 = bilingual who use (Other language) < English, 2 = bilingual who use (Other language) >= English
bilingualAdded <- bilingual %>%
mutate(bilingual_status = factor(ifelse(ACCULT_Q2_Y==777,NA,ACCULT_Q2_Y))) %>%
mutate(bilingual_degree = factor(ifelse(bilingual_status == 0, 0,
ifelse(bilingual_status == 1 & (as.numeric(ACCULT_Q4_Y) <= 3 | as.numeric(ACCULT_Q5_Y) <= 3), 1,
ifelse(bilingual_status == 1 & (as.numeric(ACCULT_Q4_Y) > 3 | as.numeric(ACCULT_Q5_Y) > 3), 2,NA))))) %>%
mutate(bilingual_use = ifelse(bilingual_status == 0, 0,
11-(as.numeric(ACCULT_Q4_Y)+as.numeric(ACCULT_Q5_Y))))
bilingualSum <- bilingualAdded %>%
dplyr::select(SUBJECTKEY,EVENTNAME, starts_with("bilingual_"))
bilingualSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| bilingual_status | 82 | 0.99 | FALSE | 2 | 0: 7357, 1: 4437 |
| bilingual_degree | 82 | 0.99 | FALSE | 3 | 0: 7357, 2: 2753, 1: 1684 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| bilingual_use | 82 | 0.99 | 1.02 | 1.7 | 0 | 0 | 0 | 1 | 9 | ▇▂▁▁▁ |
bilingualSum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| bilingual_status | 72 | 0.99 | FALSE | 2 | 0: 6246, 1: 4096 |
| bilingual_degree | 72 | 0.99 | FALSE | 3 | 0: 6246, 2: 2726, 1: 1370 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| bilingual_use | 72 | 0.99 | 1.01 | 1.65 | 0 | 0 | 0 | 1 | 9 | ▇▂▁▁▁ |
From Zhang et al. Translational Psychiatry (2020) https://doi.org/10.1038/s41398-020-0761-6 The “Safety from Crime” items from the PhenX Toolkit was used to assess neighborhood safety and crime reports. Additionally, children reported their schoolrisk and protective factors via a 12-item Inventory for School Risk and Protective Factors of the PhenX toolkit.
from parents and children
NeighboSafety_parent <-as_tibble(read.csv(paste0(dataFold,"ABCD_PNSC01_DATA_TABLE.csv"))) %>%
mutate(neighbo_safety_parent_sum = rowSums(dplyr::select(.,starts_with("NEIGHBORHOOD")),na.rm=F)) %>%
dplyr::select(SUBJECTKEY,EVENTNAME, neighbo_safety_parent_sum)
# I feel safe walking in my neighborhood, day or night. Me siento seguro(a) caminando por mi vecindario, de día o de noche.
# Violence is not a problem in my neighborhood./ La violencia no es un problema en mi vecindario.
# My neighborhood is safe from crime. Mi vecindario est√° a salvo de la delincuencia.
#1 = Strongly Disagree /Muy en desacuerdo; 2 = Disagree /En desacuerdo; 3 = Neutral (neither agree nor disagree)/ Neutral (ni de acuerdo ni en desacuerdo); 4 = Agree /De acuerdo; 5 = Strongly Agree/ Muy de acuerdo//The following questions are about your neighborhood. Your neighborhood is the area within about a 20-minute walk (or about a mile) from your home. For each of the statements please indicate whether you strongly agree, agree, neither agree nor disagree, disagree, or strongly disagree
NeighboSafety_children<-as_tibble(read.csv(paste0(dataFold,"ABCD_NSC01_DATA_TABLE.csv"))) %>%
rename(neighbo_safety_child_sum = NEIGHBORHOOD_CRIME_Y) %>%
dplyr::select(SUBJECTKEY,EVENTNAME, neighbo_safety_child_sum)
#My neighborhood is safe from crime.
NeighboSafety <- plyr::join_all(list(NeighboSafety_parent, NeighboSafety_children), by=c('SUBJECTKEY','EVENTNAME'), type='full')
NeighboSafety %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 11876 | 0 |
| EVENTNAME | 0 | 1 | 21 | 21 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| neighbo_safety_parent_sum | 47 | 1 | 11.67 | 2.93 | 3 | 10 | 12 | 14 | 15 | ▁▁▃▆▇ |
| neighbo_safety_child_sum | 24 | 1 | 4.03 | 1.09 | 1 | 3 | 4 | 5 | 5 | ▁▁▃▆▇ |
NeighboSafety %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 10414 | 0 |
| EVENTNAME | 0 | 1 | 24 | 24 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| neighbo_safety_parent_sum | 120 | 0.99 | 11.62 | 2.81 | 3 | 10 | 12 | 14 | 15 | ▁▁▅▆▇ |
| neighbo_safety_child_sum | 32 | 1.00 | 4.09 | 1.00 | 1 | 4 | 4 | 5 | 5 | ▁▁▃▆▇ |
SchRisk <-as_tibble(read.csv(paste0(dataFold,"SRPF01_DATA_TABLE.csv"))) %>%
rename(EVENTNAME = VISIT)
school_risk_sum <- SchRisk %>%
mutate(sumSchool_environment = rowSums(dplyr::select(., "SCHOOL_2_Y", "SCHOOL_3_Y", "SCHOOL_4_Y", "SCHOOL_5_Y", "SCHOOL_6_Y", "SCHOOL_7_Y"))) %>%
mutate(sumSchool_involvement = rowSums(dplyr::select(., "SCHOOL_8_Y", "SCHOOL_9_Y", "SCHOOL_10_Y", "SCHOOL_12_Y"))) %>%
mutate(sumSchool_disengagement = rowSums(dplyr::select(., "SCHOOL_15_Y", "SCHOOL_17_Y"))) %>%
dplyr::select(SUBJECTKEY,EVENTNAME, starts_with("sumSchool"))
school_risk_sum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 11876 | 0 |
| EVENTNAME | 0 | 1 | 21 | 21 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| sumSchool_environment | 27 | 1 | 19.93 | 2.83 | 6 | 18 | 20 | 22 | 24 | ▁▁▂▇▇ |
| sumSchool_involvement | 26 | 1 | 13.06 | 2.37 | 4 | 12 | 13 | 15 | 16 | ▁▁▃▅▇ |
| sumSchool_disengagement | 25 | 1 | 3.74 | 1.46 | 2 | 3 | 4 | 5 | 8 | ▇▃▃▁▁ |
school_risk_sum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 10414 | 0 |
| EVENTNAME | 0 | 1 | 24 | 24 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| sumSchool_environment | 35 | 1 | 19.64 | 2.78 | 6 | 18 | 20 | 22 | 24 | ▁▁▂▇▇ |
| sumSchool_involvement | 35 | 1 | 12.67 | 2.33 | 4 | 11 | 13 | 14 | 16 | ▁▁▅▇▇ |
| sumSchool_disengagement | 35 | 1 | 3.99 | 1.35 | 2 | 3 | 4 | 5 | 8 | ▇▆▅▂▁ |
multiplying hours x days x 4 weeks x months x years /24 to get days this method leads to high zeros. this might be because of the 999 -> 0??
sport_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_SPACSS01_DATA_TABLE.csv")))
sport_act_multiplied_sum <- sport_act %>%
# change 999 to 0. don't know seems to infer that the child doesn't do that activiy
mutate_at(vars(starts_with("SAI_SS_")), ~ replace(., which(.==999), 0)) %>%
# 0 = 0; 1 = 1; 2 = 2; 3 = 3; 4 = 4; 5 = 5; 6 = 6; 7 = 7; 8 = Once every 2 weeks; 9 = One day every month; 10 = Less than one day per month/; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for the "... how many...?" follow-up questions have been replaced with "0".
#change this so that 0 = nothing, .125 = Less than one day per month, .25 = One day every month, .5 =Once every 2 weeks, 1 = 1 day per week and so on
mutate_at(.vars = vars(ends_with("_PERWK_P")),
.funs = funs(case_when(. == 10 ~ .125,
. == 9 ~ .25,
. == 8 ~ .5,
TRUE ~ as.numeric(.)))) %>%
# mutate_at(.vars = vars(ends_with("_PERWK_P")),
# .funs = funs(case_when(. == 10 ~ 1,
# . == 9 ~ 2,
# . == 8 ~ 3,
# . == 1 ~ 4,
# . == 2 ~ 5,
# . == 3 ~ 6,
# . == 4 ~ 7,
# . == 5 ~ 8,
# . == 6 ~ 9,
# . == 7 ~ 10,
# TRUE ~ as.numeric(.)))) %>%
#0 = 0; 1 = less than 30 minutes; 2 = 30; 3 = 45; 4 = 60 (1 hr); 5 = 90 (1.5 hrs); 6 = 120 (2 hrs); 7 = 150 (2.5 hrs); 8 = 180 (3 hrs); 9 = greater than 3 hours; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for the "... how many...?" follow-up questions have been replaced with "0".
#change to hours unit. assumes less than 30 minutes to be .25 hour(15 mins) and > 3 hrs to be 4 hours
mutate_at(.vars = vars(ends_with("_TSPENT_P")),
.funs = funs(case_when(. == 1 ~ .25,
. == 2 ~ .5,
. == 3 ~ .75,
. == 4 ~ 1,
. == 5 ~ 1.5,
. == 6 ~ 2,
. == 7 ~ 2.5,
. == 8 ~ 3,
. == 9 ~ 4,
TRUE ~ as.numeric(.)))) %>%
#hours x days x 4 weeks x months x years /24 to get days
mutate(dance_days = SAI_SS_DANCE_TSPENT_P*SAI_SS_DANCE_PERWK_P*4*SAI_SS_DANCE_NMONTH_P*SAI_SS_DANCE_NYR_P/24) %>%
mutate(base_days = SAI_SS_BASE_TSPENT_P*SAI_SS_BASE_PERWK_P*4*SAI_SS_BASE_NMONTH_P*SAI_SS_BASE_NYR_P/24) %>%
mutate(basket_days = SAI_SS_BASKET_TSPENT_P*SAI_SS_BASKET_PERWK_P*4*SAI_SS_BASKET_NMONTH_P*SAI_SS_BASKET_NYR_P/24) %>%
mutate(climb_days = SAI_SS_CLIMB_TSPENT_P*SAI_SS_CLIMB_PERWK_P*4*SAI_SS_CLIMB_NMONTH_P*SAI_SS_CLIMB_NYR_P/24) %>%
mutate(fball_days = SAI_SS_FBALL_TSPENT_P*SAI_SS_FBALL_PERWK_P*4*SAI_SS_FBALL_NMONTH_P*SAI_SS_FBALL_NYR_P/24) %>%
mutate(fhock_days = SAI_SS_FHOCK_TSPENT_P*SAI_SS_FHOCK_PERWK_P*4*SAI_SS_FHOCK_NMONTH_P*SAI_SS_FHOCK_NYR_P/24) %>%
mutate(gym_days = SAI_SS_GYM_TSPENT_P*SAI_SS_GYM_PERWK_P*4*SAI_SS_GYM_NMONTH_P*SAI_SS_GYM_NYR_P/24) %>%
mutate(ihock_days = SAI_SS_IHOCK_TSPENT_P*SAI_SS_IHOCK_PERWK_P*4*SAI_SS_IHOCK_NMONTH_P*SAI_SS_IHOCK_NYR_P/24) %>%
mutate(polo_days = SAI_SS_POLO_TSPENT_P*SAI_SS_POLO_PERWK_P*4*SAI_SS_POLO_NMONTH_P*SAI_SS_POLO_NYR_P/24) %>%
mutate(iskate_days = SAI_SS_ISKATE_TSPENT_P*SAI_SS_ISKATE_PERWK_P*4*SAI_SS_ISKATE_NMONTH_P*SAI_SS_ISKATE_NYR_P/24) %>%
mutate(m_arts_days = SAI_SS_M_ARTS_TSPENT_P*SAI_SS_M_ARTS_PERWK_P*4*SAI_SS_M_ARTS_NMONTH_P*SAI_SS_M_ARTS_NYR_P/24) %>%
mutate(lax_days = SAI_SS_LAX_TSPENT_P*SAI_SS_LAX_PERWK_P*4*SAI_SS_LAX_NMONTH_P*SAI_SS_LAX_NYR_P/24) %>%
mutate(rugby_days = SAI_SS_RUGBY_TSPENT_P*SAI_SS_RUGBY_PERWK_P*4*SAI_SS_RUGBY_NMONTH_P*SAI_SS_RUGBY_NYR_P/24) %>%
mutate(skate_days = SAI_SS_SKATE_TSPENT_P*SAI_SS_SKATE_PERWK_P*4*SAI_SS_SKATE_NMONTH_P*SAI_SS_SKATE_NYR_P/24) %>%
mutate(sboard_days = SAI_SS_SBOARD_TSPENT_P*SAI_SS_SBOARD_PERWK_P*4*SAI_SS_SBOARD_NMONTH_P*SAI_SS_SBOARD_NYR_P/24) %>%
mutate(soc_days = SAI_SS_SOC_TSPENT_P*SAI_SS_SOC_PERWK_P*4*SAI_SS_SOC_NMONTH_P*SAI_SS_SOC_NYR_P/24) %>%
mutate(surf_days = SAI_SS_SURF_TSPENT_P*SAI_SS_SURF_PERWK_P*4*SAI_SS_SURF_NMONTH_P*SAI_SS_SURF_NYR_P/24) %>%
mutate(wpolo_days = SAI_SS_WPOLO_TSPENT_P*SAI_SS_WPOLO_PERWK_P*4*SAI_SS_WPOLO_NMONTH_P*SAI_SS_WPOLO_NYR_P/24) %>%
mutate(tennis_days = SAI_SS_TENNIS_TSPENT_P*SAI_SS_TENNIS_PERWK_P*4*SAI_SS_TENNIS_NMONTH_P*SAI_SS_TENNIS_NYR_P/24) %>%
mutate(run_days = SAI_SS_RUN_TSPENT_P*SAI_SS_RUN_PERWK_P*4*SAI_SS_RUN_NMONTH_P*SAI_SS_RUN_NYR_P/24) %>%
mutate(mma_days = SAI_SS_MMA_TSPENT_P*SAI_SS_MMA_PERWK_P*4*SAI_SS_MMA_NMONTH_P*SAI_SS_MMA_NYR_P/24) %>%
mutate(vball_days = SAI_SS_VBALL_TSPENT_P*SAI_SS_VBALL_PERWK_P*4*SAI_SS_VBALL_NMONTH_P*SAI_SS_VBALL_NYR_P/24) %>%
mutate(yoga_days = SAI_SS_YOGA_TSPENT_P*SAI_SS_YOGA_PERWK_P*4*SAI_SS_YOGA_NMONTH_P*SAI_SS_YOGA_NYR_P/24) %>%
mutate(music_days = SAI_SS_MUSIC_TSPENT_P*SAI_SS_MUSIC_PERWK_P*4*SAI_SS_MUSIC_NMONTH_P*SAI_SS_MUSIC_NYR_P/24) %>%
mutate(art_days = SAI_SS_ART_TSPENT_P*SAI_SS_ART_PERWK_P*4*SAI_SS_ART_NMONTH_P*SAI_SS_ART_NYR_P/24) %>%
mutate(drama_days = SAI_SS_DRAMA_TSPENT_P*SAI_SS_DRAMA_PERWK_P*4*SAI_SS_DRAMA_NMONTH_P*SAI_SS_DRAMA_NYR_P/24) %>%
mutate(craft_days = SAI_SS_CRAFTS_TSPENT_P*SAI_SS_CRAFTS_PERWK_P*4*SAI_SS_CRAFTS_NMONTH_P*SAI_SS_CRAFTS_NYR_P/24) %>%
mutate(chess_days = SAI_SS_CHESS_TSPENT_P*SAI_SS_CHESS_PERWK_P*4*SAI_SS_CHESS_NMONTH_P*SAI_SS_CHESS_NYR_P/24) %>%
mutate(collect_days = SAI_SS_COLLECT_TSPENT_P*SAI_SS_COLLECT_PERWK_P*4*SAI_SS_COLLECT_NMONTH_P*SAI_SS_COLLECT_NYR_P/24) %>%
# didn't include listening to music or reading since they are in the different scale
# summary based on kerlic's child dev paper
mutate(phys_ind_days_sum = sboard_days + climb_days + gym_days + iskate_days + m_arts_days + skate_days + dance_days + surf_days + tennis_days + run_days + mma_days + yoga_days) %>%
mutate(phys_team_days_sum = base_days + basket_days + fhock_days + fball_days + ihock_days + polo_days + lax_days + rugby_days + soc_days + wpolo_days +vball_days) %>%
mutate(art_days_sum = collect_days + music_days + art_days + drama_days + craft_days + chess_days) %>%
mutate(sport_act_all_days_sum = phys_ind_days_sum + phys_team_days_sum + art_days_sum) %>%
mutate(phys_ind_daypweek_sum = SAI_SS_SBOARD_PERWK_P + SAI_SS_CLIMB_PERWK_P + SAI_SS_GYM_PERWK_P + SAI_SS_ISKATE_PERWK_P + SAI_SS_M_ARTS_PERWK_P + SAI_SS_SKATE_PERWK_P + SAI_SS_DANCE_PERWK_P + SAI_SS_SURF_PERWK_P + SAI_SS_TENNIS_PERWK_P + SAI_SS_RUN_PERWK_P + SAI_SS_MMA_PERWK_P + SAI_SS_YOGA_PERWK_P) %>%
mutate(phys_team_daypweek_sum = SAI_SS_BASE_PERWK_P + SAI_SS_BASKET_PERWK_P + SAI_SS_FHOCK_PERWK_P + SAI_SS_FBALL_PERWK_P + SAI_SS_IHOCK_PERWK_P + SAI_SS_POLO_PERWK_P + SAI_SS_LAX_PERWK_P + SAI_SS_RUGBY_PERWK_P + SAI_SS_SOC_PERWK_P + SAI_SS_WPOLO_PERWK_P +SAI_SS_VBALL_PERWK_P) %>%
mutate(art_daypweek_sum = SAI_SS_COLLECT_PERWK_P + SAI_SS_MUSIC_PERWK_P + SAI_SS_ART_PERWK_P + SAI_SS_DRAMA_PERWK_P + SAI_SS_CRAFTS_PERWK_P + SAI_SS_CHESS_PERWK_P) %>%
mutate(sport_act_all_daypweek_sum = phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum)
sport_act_multiplied_sum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 167 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| numeric | 162 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 11876 | 0 |
| SRC_SUBJECT_ID | 0 | 1 | 16 | 16 | 0 | 11876 | 0 |
| INTERVIEW_DATE | 0 | 1 | 9 | 9 | 0 | 756 | 0 |
| SEX | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| EVENTNAME | 0 | 1 | 21 | 21 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| ABCD_SPACSS01_ID | 0 | 1.00 | 34214.50 | 3428.45 | 28277 | 31245.75 | 34214.50 | 37183.25 | 40152.00 | ▇▇▇▇▇ |
| DATASET_ID | 0 | 1.00 | 47120.00 | 0.00 | 47120 | 47120.00 | 47120.00 | 47120.00 | 47120.00 | ▁▁▇▁▁ |
| INTERVIEW_AGE | 0 | 1.00 | 118.98 | 7.50 | 107 | 112.00 | 119.00 | 126.00 | 133.00 | ▇▆▆▆▆ |
| SAI_SS_DANCE_NYR_P | 24 | 1.00 | 0.64 | 1.44 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_DANCE_NMONTH_P | 26 | 1.00 | 1.93 | 3.68 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_DANCE_PERWK_P | 20 | 1.00 | 0.42 | 0.89 | 0 | 0.00 | 0.00 | 0.25 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_DANCE_TSPENT_P | 30 | 1.00 | 0.26 | 0.51 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▂▁▁▁ |
| SAI_SS_BASE_NYR_P | 17 | 1.00 | 0.74 | 1.50 | 0 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_BASE_NMONTH_P | 23 | 1.00 | 1.14 | 2.16 | 0 | 0.00 | 0.00 | 2.00 | 12.00 | ▇▂▁▁▁ |
| SAI_SS_BASE_PERWK_P | 23 | 1.00 | 0.71 | 1.26 | 0 | 0.00 | 0.00 | 2.00 | 7.00 | ▇▁▂▁▁ |
| SAI_SS_BASE_TSPENT_P | 27 | 1.00 | 0.37 | 0.66 | 0 | 0.00 | 0.00 | 1.00 | 4.00 | ▇▂▁▁▁ |
| SAI_SS_BASKET_NYR_P | 21 | 1.00 | 0.59 | 1.23 | 0 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_BASKET_NMONTH_P | 25 | 1.00 | 1.07 | 2.14 | 0 | 0.00 | 0.00 | 1.00 | 12.00 | ▇▂▁▁▁ |
| SAI_SS_BASKET_PERWK_P | 26 | 1.00 | 0.62 | 1.17 | 0 | 0.00 | 0.00 | 1.00 | 7.00 | ▇▂▁▁▁ |
| SAI_SS_BASKET_TSPENT_P | 50 | 1.00 | 0.29 | 0.54 | 0 | 0.00 | 0.00 | 0.50 | 4.00 | ▇▂▁▁▁ |
| SAI_SS_CLIMB_NYR_P | 15 | 1.00 | 0.08 | 0.53 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_CLIMB_NMONTH_P | 14 | 1.00 | 0.16 | 1.09 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_CLIMB_PERWK_P | 19 | 1.00 | 0.06 | 0.40 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_CLIMB_TSPENT_P | 19 | 1.00 | 0.04 | 0.24 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_FHOCK_NYR_P | 10 | 1.00 | 0.01 | 0.18 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_FHOCK_NMONTH_P | 11 | 1.00 | 0.02 | 0.34 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_FHOCK_PERWK_P | 10 | 1.00 | 0.01 | 0.18 | 0 | 0.00 | 0.00 | 0.00 | 5.00 | ▇▁▁▁▁ |
| SAI_SS_FHOCK_TSPENT_P | 10 | 1.00 | 0.01 | 0.10 | 0 | 0.00 | 0.00 | 0.00 | 3.00 | ▇▁▁▁▁ |
| SAI_SS_FBALL_NYR_P | 18 | 1.00 | 0.25 | 0.86 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_FBALL_NMONTH_P | 21 | 1.00 | 0.44 | 1.42 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_FBALL_PERWK_P | 20 | 1.00 | 0.35 | 1.10 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_FBALL_TSPENT_P | 24 | 1.00 | 0.17 | 0.51 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_GYM_NYR_P | 15 | 1.00 | 0.49 | 1.16 | 0 | 0.00 | 0.00 | 0.00 | 9.00 | ▇▁▁▁▁ |
| SAI_SS_GYM_NMONTH_P | 19 | 1.00 | 1.71 | 3.59 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_GYM_PERWK_P | 21 | 1.00 | 0.33 | 0.77 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_GYM_TSPENT_P | 31 | 1.00 | 0.26 | 0.56 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▂▁▁▁ |
| SAI_SS_IHOCK_NYR_P | 10 | 1.00 | 0.08 | 0.56 | 0 | 0.00 | 0.00 | 0.00 | 9.00 | ▇▁▁▁▁ |
| SAI_SS_IHOCK_NMONTH_P | 13 | 1.00 | 0.14 | 1.00 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_IHOCK_PERWK_P | 10 | 1.00 | 0.07 | 0.48 | 0 | 0.00 | 0.00 | 0.00 | 6.00 | ▇▁▁▁▁ |
| SAI_SS_IHOCK_TSPENT_P | 11 | 1.00 | 0.03 | 0.20 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_POLO_NYR_P | 11 | 1.00 | 0.08 | 0.52 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_POLO_NMONTH_P | 14 | 1.00 | 0.22 | 1.38 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_POLO_PERWK_P | 14 | 1.00 | 0.05 | 0.36 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_POLO_TSPENT_P | 17 | 1.00 | 0.04 | 0.25 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_ISKATE_NYR_P | 11 | 1.00 | 0.13 | 0.64 | 0 | 0.00 | 0.00 | 0.00 | 9.00 | ▇▁▁▁▁ |
| SAI_SS_ISKATE_NMONTH_P | 16 | 1.00 | 0.27 | 1.38 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_ISKATE_PERWK_P | 18 | 1.00 | 0.08 | 0.40 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_ISKATE_TSPENT_P | 19 | 1.00 | 0.06 | 0.26 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_M_ARTS_NYR_P | 16 | 1.00 | 0.36 | 0.97 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_M_ARTS_NMONTH_P | 23 | 1.00 | 1.49 | 3.63 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_M_ARTS_PERWK_P | 19 | 1.00 | 0.36 | 0.91 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_M_ARTS_TSPENT_P | 25 | 1.00 | 0.17 | 0.40 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_LAX_NYR_P | 10 | 1.00 | 0.06 | 0.41 | 0 | 0.00 | 0.00 | 0.00 | 8.00 | ▇▁▁▁▁ |
| SAI_SS_LAX_NMONTH_P | 12 | 1.00 | 0.11 | 0.76 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_LAX_PERWK_P | 15 | 1.00 | 0.07 | 0.42 | 0 | 0.00 | 0.00 | 0.00 | 5.00 | ▇▁▁▁▁ |
| SAI_SS_LAX_TSPENT_P | 13 | 1.00 | 0.04 | 0.23 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_RUGBY_NYR_P | 10 | 1.00 | 0.01 | 0.14 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_RUGBY_NMONTH_P | 10 | 1.00 | 0.01 | 0.28 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_RUGBY_PERWK_P | 10 | 1.00 | 0.01 | 0.15 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_RUGBY_TSPENT_P | 10 | 1.00 | 0.00 | 0.09 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_SKATE_NYR_P | 13 | 1.00 | 0.09 | 0.56 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_SKATE_NMONTH_P | 17 | 1.00 | 0.22 | 1.33 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_SKATE_PERWK_P | 14 | 1.00 | 0.10 | 0.62 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_SKATE_TSPENT_P | 18 | 1.00 | 0.03 | 0.20 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_SBOARD_NYR_P | 10 | 1.00 | 0.33 | 1.25 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_SBOARD_NMONTH_P | 20 | 1.00 | 0.27 | 0.99 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_SBOARD_PERWK_P | 12 | 1.00 | 0.10 | 0.42 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_SBOARD_TSPENT_P | 16 | 1.00 | 0.25 | 0.90 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_SOC_NYR_P | 31 | 1.00 | 1.21 | 1.85 | 0 | 0.00 | 0.00 | 2.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_SOC_NMONTH_P | 44 | 1.00 | 2.04 | 3.05 | 0 | 0.00 | 0.00 | 4.00 | 12.00 | ▇▂▁▁▁ |
| SAI_SS_SOC_PERWK_P | 38 | 1.00 | 0.95 | 1.27 | 0 | 0.00 | 0.00 | 2.00 | 7.00 | ▇▂▂▁▁ |
| SAI_SS_SOC_TSPENT_P | 58 | 1.00 | 0.47 | 0.62 | 0 | 0.00 | 0.00 | 1.00 | 4.00 | ▇▅▁▁▁ |
| SAI_SS_SURF_NYR_P | 10 | 1.00 | 0.01 | 0.21 | 0 | 0.00 | 0.00 | 0.00 | 9.00 | ▇▁▁▁▁ |
| SAI_SS_SURF_NMONTH_P | 12 | 1.00 | 0.02 | 0.31 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_SURF_PERWK_P | 10 | 1.00 | 0.01 | 0.18 | 0 | 0.00 | 0.00 | 0.00 | 5.00 | ▇▁▁▁▁ |
| SAI_SS_SURF_TSPENT_P | 11 | 1.00 | 0.01 | 0.15 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_WPOLO_NYR_P | 22 | 1.00 | 1.16 | 2.14 | 0 | 0.00 | 0.00 | 2.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_WPOLO_NMONTH_P | 42 | 1.00 | 1.76 | 3.24 | 0 | 0.00 | 0.00 | 3.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_WPOLO_PERWK_P | 35 | 1.00 | 0.78 | 1.43 | 0 | 0.00 | 0.00 | 1.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_WPOLO_TSPENT_P | 47 | 1.00 | 0.31 | 0.56 | 0 | 0.00 | 0.00 | 0.50 | 4.00 | ▇▂▁▁▁ |
| SAI_SS_TENNIS_NYR_P | 15 | 1.00 | 0.14 | 0.63 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_TENNIS_NMONTH_P | 17 | 1.00 | 0.31 | 1.46 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_TENNIS_PERWK_P | 19 | 1.00 | 0.12 | 0.54 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_TENNIS_TSPENT_P | 21 | 1.00 | 0.07 | 0.29 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_RUN_NYR_P | 18 | 1.00 | 0.14 | 0.62 | 0 | 0.00 | 0.00 | 0.00 | 9.00 | ▇▁▁▁▁ |
| SAI_SS_RUN_NMONTH_P | 20 | 1.00 | 0.29 | 1.27 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_RUN_PERWK_P | 24 | 1.00 | 0.16 | 0.68 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_RUN_TSPENT_P | 20 | 1.00 | 0.08 | 0.31 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_MMA_NYR_P | 16 | 1.00 | 0.06 | 0.45 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_MMA_NMONTH_P | 20 | 1.00 | 0.17 | 1.16 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_MMA_PERWK_P | 16 | 1.00 | 0.08 | 0.49 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_MMA_TSPENT_P | 20 | 1.00 | 0.04 | 0.24 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_VBALL_NYR_P | 13 | 1.00 | 0.04 | 0.27 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_VBALL_NMONTH_P | 15 | 1.00 | 0.09 | 0.66 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_VBALL_PERWK_P | 17 | 1.00 | 0.05 | 0.36 | 0 | 0.00 | 0.00 | 0.00 | 6.00 | ▇▁▁▁▁ |
| SAI_SS_VBALL_TSPENT_P | 14 | 1.00 | 0.03 | 0.20 | 0 | 0.00 | 0.00 | 0.00 | 3.00 | ▇▁▁▁▁ |
| SAI_SS_YOGA_NYR_P | 13 | 1.00 | 0.04 | 0.37 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_YOGA_NMONTH_P | 13 | 1.00 | 0.12 | 0.96 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_YOGA_PERWK_P | 14 | 1.00 | 0.03 | 0.30 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_YOGA_TSPENT_P | 16 | 1.00 | 0.02 | 0.12 | 0 | 0.00 | 0.00 | 0.00 | 3.00 | ▇▁▁▁▁ |
| SAI_SS_MUSIC_NYR_P | 25 | 1.00 | 0.90 | 1.48 | 0 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_MUSIC_NMONTH_P | 39 | 1.00 | 3.38 | 4.65 | 0 | 0.00 | 0.00 | 8.00 | 12.00 | ▇▁▁▁▂ |
| SAI_SS_MUSIC_PERWK_P | 32 | 1.00 | 0.84 | 1.44 | 0 | 0.00 | 0.00 | 1.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_MUSIC_TSPENT_P | 42 | 1.00 | 0.29 | 0.42 | 0 | 0.00 | 0.00 | 0.50 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_ART_NYR_P | 27 | 1.00 | 0.79 | 1.94 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_ART_NMONTH_P | 36 | 1.00 | 1.60 | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_ART_PERWK_P | 41 | 1.00 | 0.54 | 1.39 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_ART_TSPENT_P | 38 | 1.00 | 0.18 | 0.45 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_DRAMA_NYR_P | 14 | 1.00 | 0.23 | 0.83 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_DRAMA_NMONTH_P | 29 | 1.00 | 0.53 | 1.83 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_DRAMA_PERWK_P | 27 | 1.00 | 0.23 | 0.83 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_DRAMA_TSPENT_P | 24 | 1.00 | 0.16 | 0.54 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_CRAFTS_NYR_P | 21 | 1.00 | 0.28 | 1.12 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_CRAFTS_NMONTH_P | 28 | 1.00 | 0.62 | 2.40 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_CRAFTS_PERWK_P | 27 | 1.00 | 0.21 | 0.85 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_CRAFTS_TSPENT_P | 27 | 1.00 | 0.08 | 0.30 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_CHESS_NYR_P | 22 | 1.00 | 0.27 | 0.96 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_CHESS_NMONTH_P | 28 | 1.00 | 0.70 | 2.36 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_CHESS_PERWK_P | 34 | 1.00 | 0.17 | 0.66 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_CHESS_TSPENT_P | 33 | 1.00 | 0.10 | 0.32 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_COLLECT_NYR_P | 17 | 1.00 | 0.21 | 0.93 | 0 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
| SAI_SS_COLLECT_NMONTH_P | 28 | 1.00 | 0.53 | 2.32 | 0 | 0.00 | 0.00 | 0.00 | 12.00 | ▇▁▁▁▁ |
| SAI_SS_COLLECT_PERWK_P | 24 | 1.00 | 0.17 | 0.86 | 0 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| SAI_SS_COLLECT_TSPENT_P | 23 | 1.00 | 0.04 | 0.20 | 0 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| SAI_SS_LMUSIC_YEARS_P | 1220 | 0.90 | 3.84 | 3.22 | 0 | 1.00 | 3.00 | 6.00 | 10.00 | ▇▃▂▂▃ |
| SAI_SS_LMUSIC_HOURS_P | 1697 | 0.86 | 4.64 | 7.58 | 0 | 1.00 | 3.00 | 6.00 | 168.00 | ▇▁▁▁▁ |
| SAI_SS_READ_YEARS_P | 591 | 0.95 | 2.72 | 2.38 | 0 | 0.00 | 3.00 | 4.00 | 10.00 | ▇▅▃▁▁ |
| SAI_SS_READ_HOURS_P | 831 | 0.93 | 4.62 | 9.22 | 0 | 0.00 | 3.00 | 6.00 | 168.00 | ▇▁▁▁▁ |
| SPORTS_ACTIVITY_SS_LMUSIC_P | 2069 | 0.83 | 169.79 | 369.57 | 1 | 3.00 | 6.00 | 9.00 | 999.00 | ▇▁▁▁▂ |
| SPORTS_ACTIVITY_SS_READ_P | 3309 | 0.72 | 91.02 | 278.59 | 1 | 4.00 | 6.00 | 8.00 | 999.00 | ▇▁▁▁▁ |
| dance_days | 54 | 1.00 | 2.28 | 12.64 | 0 | 0.00 | 0.00 | 0.00 | 560.00 | ▇▁▁▁▁ |
| base_days | 52 | 1.00 | 2.58 | 9.11 | 0 | 0.00 | 0.00 | 0.67 | 266.67 | ▇▁▁▁▁ |
| basket_days | 74 | 0.99 | 1.42 | 5.58 | 0 | 0.00 | 0.00 | 0.00 | 147.00 | ▇▁▁▁▁ |
| climb_days | 28 | 1.00 | 0.14 | 1.93 | 0 | 0.00 | 0.00 | 0.00 | 128.00 | ▇▁▁▁▁ |
| fball_days | 35 | 1.00 | 1.02 | 6.11 | 0 | 0.00 | 0.00 | 0.00 | 245.00 | ▇▁▁▁▁ |
| fhock_days | 11 | 1.00 | 0.02 | 0.67 | 0 | 0.00 | 0.00 | 0.00 | 55.00 | ▇▁▁▁▁ |
| gym_days | 54 | 1.00 | 1.99 | 12.01 | 0 | 0.00 | 0.00 | 0.00 | 392.00 | ▇▁▁▁▁ |
| ihock_days | 14 | 1.00 | 0.35 | 3.31 | 0 | 0.00 | 0.00 | 0.00 | 75.00 | ▇▁▁▁▁ |
| polo_days | 23 | 1.00 | 0.17 | 2.54 | 0 | 0.00 | 0.00 | 0.00 | 135.00 | ▇▁▁▁▁ |
| iskate_days | 28 | 1.00 | 0.21 | 2.14 | 0 | 0.00 | 0.00 | 0.00 | 108.00 | ▇▁▁▁▁ |
| m_arts_days | 48 | 1.00 | 1.35 | 5.69 | 0 | 0.00 | 0.00 | 0.00 | 175.00 | ▇▁▁▁▁ |
| lax_days | 18 | 1.00 | 0.16 | 1.68 | 0 | 0.00 | 0.00 | 0.00 | 60.00 | ▇▁▁▁▁ |
| rugby_days | 10 | 1.00 | 0.02 | 0.53 | 0 | 0.00 | 0.00 | 0.00 | 26.67 | ▇▁▁▁▁ |
| skate_days | 23 | 1.00 | 0.29 | 3.97 | 0 | 0.00 | 0.00 | 0.00 | 261.33 | ▇▁▁▁▁ |
| sboard_days | 28 | 1.00 | 0.85 | 4.93 | 0 | 0.00 | 0.00 | 0.00 | 128.00 | ▇▁▁▁▁ |
| soc_days | 107 | 0.99 | 3.84 | 10.13 | 0 | 0.00 | 0.00 | 3.00 | 196.00 | ▇▁▁▁▁ |
| surf_days | 13 | 1.00 | 0.03 | 0.87 | 0 | 0.00 | 0.00 | 0.00 | 48.00 | ▇▁▁▁▁ |
| wpolo_days | 90 | 0.99 | 2.97 | 10.38 | 0 | 0.00 | 0.00 | 1.25 | 256.00 | ▇▁▁▁▁ |
| tennis_days | 31 | 1.00 | 0.31 | 4.56 | 0 | 0.00 | 0.00 | 0.00 | 420.00 | ▇▁▁▁▁ |
| run_days | 36 | 1.00 | 0.30 | 2.29 | 0 | 0.00 | 0.00 | 0.00 | 96.00 | ▇▁▁▁▁ |
| mma_days | 25 | 1.00 | 0.28 | 3.06 | 0 | 0.00 | 0.00 | 0.00 | 175.00 | ▇▁▁▁▁ |
| vball_days | 20 | 1.00 | 0.07 | 0.73 | 0 | 0.00 | 0.00 | 0.00 | 44.00 | ▇▁▁▁▁ |
| yoga_days | 17 | 1.00 | 0.07 | 1.05 | 0 | 0.00 | 0.00 | 0.00 | 48.00 | ▇▁▁▁▁ |
| music_days | 82 | 0.99 | 2.34 | 7.09 | 0 | 0.00 | 0.00 | 1.67 | 224.00 | ▇▁▁▁▁ |
| art_days | 70 | 0.99 | 3.68 | 17.62 | 0 | 0.00 | 0.00 | 0.00 | 504.00 | ▇▁▁▁▁ |
| drama_days | 46 | 1.00 | 0.69 | 5.10 | 0 | 0.00 | 0.00 | 0.00 | 280.00 | ▇▁▁▁▁ |
| craft_days | 43 | 1.00 | 1.11 | 8.54 | 0 | 0.00 | 0.00 | 0.00 | 280.00 | ▇▁▁▁▁ |
| chess_days | 59 | 1.00 | 0.58 | 4.60 | 0 | 0.00 | 0.00 | 0.00 | 200.00 | ▇▁▁▁▁ |
| collect_days | 39 | 1.00 | 0.62 | 7.08 | 0 | 0.00 | 0.00 | 0.00 | 448.00 | ▇▁▁▁▁ |
| phys_ind_days_sum | 259 | 0.98 | 8.06 | 21.58 | 0 | 0.00 | 1.00 | 8.00 | 560.00 | ▇▁▁▁▁ |
| phys_team_days_sum | 322 | 0.97 | 12.60 | 22.06 | 0 | 0.00 | 4.00 | 15.50 | 367.67 | ▇▁▁▁▁ |
| art_days_sum | 263 | 0.98 | 8.85 | 26.38 | 0 | 0.00 | 0.33 | 6.00 | 600.00 | ▇▁▁▁▁ |
| sport_act_all_days_sum | 735 | 0.94 | 29.34 | 43.86 | 0 | 3.33 | 15.33 | 38.00 | 826.17 | ▇▁▁▁▁ |
| phys_ind_daypweek_sum | 93 | 0.99 | 1.84 | 2.42 | 0 | 0.00 | 1.00 | 3.00 | 44.75 | ▇▁▁▁▁ |
| phys_team_daypweek_sum | 112 | 0.99 | 3.67 | 3.75 | 0 | 0.00 | 3.00 | 6.00 | 36.00 | ▇▁▁▁▁ |
| art_daypweek_sum | 118 | 0.99 | 2.15 | 3.23 | 0 | 0.00 | 1.00 | 3.00 | 36.00 | ▇▁▁▁▁ |
| sport_act_all_daypweek_sum | 273 | 0.98 | 7.63 | 6.40 | 0 | 3.00 | 6.12 | 11.00 | 103.25 | ▇▁▁▁▁ |
sport_act_multiplied_sum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 0 |
| Number of columns | 167 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| numeric | 162 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | NaN | NA | NA | 0 | 0 | 0 |
| SRC_SUBJECT_ID | 0 | NaN | NA | NA | 0 | 0 | 0 |
| INTERVIEW_DATE | 0 | NaN | NA | NA | 0 | 0 | 0 |
| SEX | 0 | NaN | NA | NA | 0 | 0 | 0 |
| EVENTNAME | 0 | NaN | NA | NA | 0 | 0 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| ABCD_SPACSS01_ID | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| DATASET_ID | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| INTERVIEW_AGE | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_DANCE_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_DANCE_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_DANCE_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_DANCE_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_BASE_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_BASE_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_BASE_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_BASE_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_BASKET_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_BASKET_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_BASKET_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_BASKET_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CLIMB_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CLIMB_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CLIMB_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CLIMB_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_FHOCK_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_FHOCK_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_FHOCK_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_FHOCK_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_FBALL_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_FBALL_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_FBALL_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_FBALL_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_GYM_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_GYM_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_GYM_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_GYM_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_IHOCK_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_IHOCK_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_IHOCK_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_IHOCK_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_POLO_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_POLO_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_POLO_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_POLO_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_ISKATE_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_ISKATE_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_ISKATE_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_ISKATE_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_M_ARTS_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_M_ARTS_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_M_ARTS_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_M_ARTS_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_LAX_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_LAX_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_LAX_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_LAX_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_RUGBY_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_RUGBY_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_RUGBY_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_RUGBY_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SKATE_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SKATE_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SKATE_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SKATE_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SBOARD_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SBOARD_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SBOARD_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SBOARD_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SOC_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SOC_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SOC_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SOC_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SURF_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SURF_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SURF_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_SURF_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_WPOLO_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_WPOLO_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_WPOLO_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_WPOLO_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_TENNIS_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_TENNIS_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_TENNIS_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_TENNIS_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_RUN_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_RUN_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_RUN_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_RUN_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_MMA_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_MMA_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_MMA_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_MMA_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_VBALL_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_VBALL_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_VBALL_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_VBALL_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_YOGA_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_YOGA_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_YOGA_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_YOGA_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_MUSIC_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_MUSIC_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_MUSIC_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_MUSIC_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_ART_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_ART_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_ART_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_ART_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_DRAMA_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_DRAMA_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_DRAMA_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_DRAMA_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CRAFTS_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CRAFTS_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CRAFTS_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CRAFTS_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CHESS_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CHESS_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CHESS_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_CHESS_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_COLLECT_NYR_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_COLLECT_NMONTH_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_COLLECT_PERWK_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_COLLECT_TSPENT_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_LMUSIC_YEARS_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_LMUSIC_HOURS_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_READ_YEARS_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SAI_SS_READ_HOURS_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SPORTS_ACTIVITY_SS_LMUSIC_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| SPORTS_ACTIVITY_SS_READ_P | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| dance_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| base_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| basket_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| climb_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| fball_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| fhock_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| gym_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| ihock_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| polo_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| iskate_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| m_arts_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| lax_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| rugby_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| skate_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| sboard_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| soc_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| surf_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| wpolo_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| tennis_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| run_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| mma_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| vball_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| yoga_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| music_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| art_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| drama_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| craft_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| chess_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| collect_days | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| phys_ind_days_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| phys_team_days_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| art_days_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| sport_act_all_days_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| phys_ind_daypweek_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| phys_team_daypweek_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| art_daypweek_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| sport_act_all_daypweek_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA |
method used by Kerlic et al. They focus only on days per week. The data were converted differently.
sport_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_SPACSS01_DATA_TABLE.csv")))
sport_act_kerlic_sum <- sport_act %>%
# change 999 to 0. don't know seems to infer that the child doesn't do that activiy
mutate_at(vars(starts_with("SAI_SS_")), ~ replace(., which(.==999), 0)) %>%
# 0 = 0; 1 = 1; 2 = 2; 3 = 3; 4 = 4; 5 = 5; 6 = 6; 7 = 7; 8 = Once every 2 weeks; 9 = One day every month; 10 = Less than one day per month/; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for the "... how many...?" follow-up questions have been replaced with "0".
#change this so that 0 = nothing, .125 = Less than one day per month, .25 = One day every month, .5 =Once every 2 weeks, 1 = 1 day per week and so on
mutate_at(.vars = vars(ends_with("_PERWK_P")),
.funs = funs(case_when(. == 10 ~ 1,
. == 9 ~ 2,
. == 8 ~ 3,
. == 1 ~ 4,
. == 2 ~ 5,
. == 3 ~ 6,
. == 4 ~ 7,
. == 5 ~ 8,
. == 6 ~ 9,
. == 7 ~ 10,
TRUE ~ as.numeric(.)))) %>%
mutate(phys_ind_daypweek_sum = SAI_SS_SBOARD_PERWK_P + SAI_SS_CLIMB_PERWK_P + SAI_SS_GYM_PERWK_P + SAI_SS_ISKATE_PERWK_P + SAI_SS_M_ARTS_PERWK_P + SAI_SS_SKATE_PERWK_P + SAI_SS_DANCE_PERWK_P + SAI_SS_SURF_PERWK_P + SAI_SS_TENNIS_PERWK_P + SAI_SS_RUN_PERWK_P + SAI_SS_MMA_PERWK_P + SAI_SS_YOGA_PERWK_P) %>%
mutate(phys_team_daypweek_sum = SAI_SS_BASE_PERWK_P + SAI_SS_BASKET_PERWK_P + SAI_SS_FHOCK_PERWK_P + SAI_SS_FBALL_PERWK_P + SAI_SS_IHOCK_PERWK_P + SAI_SS_POLO_PERWK_P + SAI_SS_LAX_PERWK_P + SAI_SS_RUGBY_PERWK_P + SAI_SS_SOC_PERWK_P + SAI_SS_WPOLO_PERWK_P +SAI_SS_VBALL_PERWK_P) %>%
mutate(art_daypweek_sum = SAI_SS_COLLECT_PERWK_P + SAI_SS_MUSIC_PERWK_P + SAI_SS_ART_PERWK_P + SAI_SS_DRAMA_PERWK_P + SAI_SS_CRAFTS_PERWK_P + SAI_SS_CHESS_PERWK_P) %>%
mutate(sport_act_all_daypweek_sum = phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum) %>%
dplyr::select( SUBJECTKEY, EVENTNAME, ends_with("_daypweek_sum"))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
##
## # Simple named list: list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
##
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
sport_act_kerlic_sum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-SUBJECTKEY, -EVENTNAME, ends_with('_daypweek_sum')) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| phys_ind_daypweek_sum | 93 | 0.99 | 4.89 | 5.64 | 0 | 0 | 4 | 8 | 70 | ▇▁▁▁▁ |
| phys_team_daypweek_sum | 112 | 0.99 | 8.12 | 7.60 | 0 | 0 | 6 | 13 | 54 | ▇▃▁▁▁ |
| art_daypweek_sum | 118 | 0.99 | 4.96 | 6.25 | 0 | 0 | 4 | 8 | 54 | ▇▁▁▁▁ |
| sport_act_all_daypweek_sum | 273 | 0.98 | 17.89 | 13.72 | 0 | 8 | 16 | 26 | 170 | ▇▁▁▁▁ |
sport_act_kerlic_sum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-SUBJECTKEY, -EVENTNAME, ends_with('_daypweek_sum')) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 0 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| phys_ind_daypweek_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| phys_team_daypweek_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| art_daypweek_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA | |
| sport_act_all_daypweek_sum | 0 | NaN | NaN | NA | NA | NA | NA | NA | NA |
ABCD Youth Risk Behavior Survey Exercise Physical Activity
phyc_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_YRB01_DATA_TABLE.csv"))) %>%
rename(physc_act_days = PHYSICAL_ACTIVITY1_Y) %>%
dplyr::select(SUBJECTKEY, EVENTNAME, physc_act_days)
#During the past 7 days, on how many days were you physically active for a total of at least 60 minutes per day? (Add up all the time you spent in any kind of physical activity that increased your heart rate and made you breathe hard some of the time)
phyc_act %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-SUBJECTKEY, -EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| physc_act_days | 28 | 1 | 3.49 | 2.32 | 0 | 2 | 3 | 5 | 7 | ▇▅▇▅▇ |
phyc_act %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-SUBJECTKEY, -EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| physc_act_days | 28 | 1 | 3.79 | 2.15 | 0 | 2 | 4 | 5 | 7 | ▅▃▇▅▆ |
ABCD Youth Anthropometrics Modified From PhenX values are questionable, even after deleting outliers. We ended up not using them.
anthro <-as_tibble(read.csv(paste0(dataFold,"ABCD_ANT01_DATA_TABLE.csv")))
count(anthro,ANTHROWEIGHTCAST)
## # A tibble: 3 × 2
## ANTHROWEIGHTCAST n
## <int> <int>
## 1 0 31903
## 2 1 96
## 3 NA 7767
# remove those (82) with cast as BMI won't be accurate
bmi_waist <- anthro %>%
# filter(ANTHROHEIGHTCALC > 30) %>% # remove those who are unusally short. Potentially error in data entering
# filter(ANTHROWEIGHTCALC < 500) %>%
# filter(!rstatix::is_outlier(ANTHROHEIGHTCALC) & !rstatix::is_outlier(ANTHROWEIGHTCALC)) %>%
mutate(bmi = ifelse(ANTHROWEIGHTCAST == 0 | is.na(ANTHROWEIGHTCAST),
(ANTHROWEIGHTCALC/(ANTHROHEIGHTCALC^2))*703,NA)) %>%
rename(waist = ANTHRO_WAIST_CM) %>%
dplyr::select(SUBJECTKEY, EVENTNAME, bmi, waist, ANTHROWEIGHTCALC, ANTHROHEIGHTCALC)
# anthro %>%
# mutate(bmi = ifelse(ANTHROWEIGHTCAST == 0 | is.na(ANTHROWEIGHTCAST),
# (ANTHROWEIGHTCALC/(ANTHROHEIGHTCALC^2))*703,NA)) %>%
# rename(waist = ANTHRO_WAIST_CM) %>%
# arrange(desc(bmi)) %>% glimpse()
# bmi_waist %>% arrange(desc(bmi)) %>% glimpse()
# bmi_waist %>% arrange(bmi) %>% glimpse()
#
# anthro %>% rstatix::identify_outliers(ANTHROHEIGHTCALC) %>% arrange(ANTHROHEIGHTCALC) %>% View()
# anthro %>% rstatix::identify_outliers(ANTHROHEIGHTCALC) %>% arrange(desc(ANTHROHEIGHTCALC)) %>% View()
# boxplot(anthro$ANTHROHEIGHTCALC)$out
# boxplot(anthro$ANTHROWEIGHTCALC)$out
#
# boxplot(bmi_waist$ANTHROWEIGHTCALC)$out
bmi_waist %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
dplyr::select(-SUBJECTKEY, -EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11876 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| logical | 1 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| ANTHROWEIGHTCALC | 11876 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| bmi | 11876 | 0 | NaN | NA | NA | NA | NA | NA | NA | |
| waist | 17 | 1 | 26.48 | 4.30 | 0 | 23.5 | 25.5 | 28.7 | 73 | ▁▇▂▁▁ |
| ANTHROHEIGHTCALC | 9 | 1 | 55.24 | 3.33 | 0 | 53.0 | 55.1 | 57.2 | 82 | ▁▁▁▇▁ |
bmi_waist %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
dplyr::select(-SUBJECTKEY, -EVENTNAME) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10414 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| logical | 1 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| ANTHROWEIGHTCALC | 10414 | 0 | NaN | : |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| bmi | 10414 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| waist | 2892 | 0.72 | 28.77 | 4.97 | 0.00 | 25.1 | 27.75 | 31.0 | 75.0 | ▁▇▃▁▁ |
| ANTHROHEIGHTCALC | 1560 | 0.85 | 60.19 | 3.60 | 5.75 | 58.0 | 60.00 | 62.5 | 98.3 | ▁▁▇▅▁ |
Set the feature names
Child_Sleep <- c("sleep_hours","sleep_disturb","sleep_initiate_maintain","sleep_breath","sleep_arousal","sleep_transition","sleep_somnolence","sleep_hyperhydrosis")
Physical_Activity <- c("phys_ind_daypweek_sum","phys_team_daypweek_sum","art_daypweek_sum","physc_act_days")
Child_Screen_Use <- c("matureGames_Screen","matureMovies_Screen","wkdySum_Screen","wkndSum_Screen")
Parent_Drug_Use <-c("tobacco_before_preg","tobacco_after_preg","alcohol_before_preg","alcohol_after_preg","marijuana_before_preg","marijuana_after_preg")
Child_Developmental_Adversity <- c("deveplopment_prematurity","deveplopment_birth_complications","deveplopment_pregnancy_complications")
Child_Socio_Demographics <- c("bilingual_use","marital","educationAvg","combinedIncome","householdSize","econ_insecurities_sum","area_deprivation_index","lead_risk","quartic_uniform_crime_reports","neighbo_safety_parent_sum","neighbo_safety_child_sum","sumSchool_environment","sumSchool_involvement","sumSchool_disengagement")
Social_Interaction<- c("parent_monitor_mean","fam_conflict_parent","fam_conflict_children","prosocial_parent_mean","prosocial_youth_mean")
features <- c(Child_Sleep,Physical_Activity,Child_Screen_Use,Parent_Drug_Use,Child_Developmental_Adversity,Child_Socio_Demographics,Social_Interaction)
all_sum_vars <-
plyr::join_all(list(Siteinfo, ACSselected,sleepSum,youthScreenSum,
momSubstanceUse,adversitySum,bilingualSum,
demograpSum,ResidHistDer,NeighboSafety,
school_risk_sum,ParentMonitoring,FamilyConflict_sum,prosocial_sum,
sport_act_kerlic_sum,phyc_act,vision_idx),
by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
filter(visionProb != 1|is.na(visionProb)) %>% #remove subjects with eyesight problems
dplyr::select(-visionProb)
all_sum_vars %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% dplyr::select(-1:-2) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 11845 |
| Number of columns | 66 |
| _______________________ | |
| Column type frequency: | |
| character | 6 |
| factor | 11 |
| numeric | 49 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 11845 | 0 |
| SRC_SUBJECT_ID | 0 | 1 | 16 | 16 | 0 | 11845 | 0 |
| INTERVIEW_DATE | 0 | 1 | 9 | 9 | 0 | 756 | 0 |
| SEX | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| EVENTNAME | 0 | 1 | 21 | 21 | 0 | 1 | 0 |
| SITE_ID_L | 0 | 1 | 6 | 6 | 0 | 22 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| REL_FAMILY_ID | 0 | 1.00 | FALSE | 9832 | 373: 5, 749: 4, 11: 3, 400: 3 |
| tobacco_before_preg | 277 | 0.98 | FALSE | 2 | 0: 9960, 1: 1608 |
| tobacco_after_preg | 263 | 0.98 | FALSE | 2 | 0: 10962, 1: 620 |
| alcohol_before_preg | 681 | 0.94 | FALSE | 2 | 0: 8289, 1: 2875 |
| alcohol_after_preg | 291 | 0.98 | FALSE | 2 | 0: 11239, 1: 315 |
| marijuana_before_preg | 337 | 0.97 | FALSE | 2 | 0: 10822, 1: 686 |
| marijuana_after_preg | 275 | 0.98 | FALSE | 2 | 0: 11326, 1: 244 |
| deveplopment_prematurity | 145 | 0.99 | FALSE | 2 | 0: 9497, 1: 2203 |
| bilingual_status | 81 | 0.99 | FALSE | 2 | 0: 7343, 1: 4421 |
| bilingual_degree | 81 | 0.99 | FALSE | 3 | 0: 7343, 2: 2743, 1: 1678 |
| marital | 96 | 0.99 | FALSE | 6 | mar: 7973, nev: 1455, div: 1077, liv: 684 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| INTERVIEW_AGE | 0 | 1.00 | 118.98 | 7.49 | 107.00 | 112.00 | 119.00 | 126.00 | 133.00 | ▇▆▆▆▆ |
| SCHED_DELAY | 13 | 1.00 | 7.00 | 0.10 | 1.00 | 7.00 | 7.00 | 7.00 | 7.00 | ▁▁▁▁▇ |
| SCHED_HYBRID | 11845 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| ACS_RAKED_PROPENSITY_SCORE | 0 | 1.00 | 691.25 | 350.96 | 161.36 | 448.94 | 619.31 | 821.72 | 1778.92 | ▅▇▂▂▁ |
| sleep_hours | 5 | 1.00 | 1.72 | 0.81 | 1.00 | 1.00 | 2.00 | 2.00 | 5.00 | ▇▆▂▁▁ |
| sleep_disturb | 5 | 1.00 | 1.93 | 0.98 | 1.00 | 1.00 | 2.00 | 2.00 | 5.00 | ▇▇▂▁▁ |
| sleep_initiate_maintain | 5 | 1.00 | 11.76 | 3.75 | 7.00 | 9.00 | 11.00 | 13.00 | 35.00 | ▇▃▁▁▁ |
| sleep_breath | 5 | 1.00 | 3.77 | 1.25 | 3.00 | 3.00 | 3.00 | 4.00 | 15.00 | ▇▁▁▁▁ |
| sleep_arousal | 5 | 1.00 | 3.44 | 0.92 | 3.00 | 3.00 | 3.00 | 4.00 | 15.00 | ▇▁▁▁▁ |
| sleep_transition | 32 | 1.00 | 8.18 | 2.63 | 6.00 | 6.00 | 7.00 | 9.00 | 30.00 | ▇▁▁▁▁ |
| sleep_somnolence | 6 | 1.00 | 6.95 | 2.44 | 5.00 | 5.00 | 6.00 | 8.00 | 25.00 | ▇▁▁▁▁ |
| sleep_hyperhydrosis | 5 | 1.00 | 2.44 | 1.18 | 2.00 | 2.00 | 2.00 | 2.00 | 10.00 | ▇▁▁▁▁ |
| sleep_total | 33 | 1.00 | 36.53 | 8.24 | 26.00 | 31.00 | 35.00 | 40.00 | 126.00 | ▇▁▁▁▁ |
| matureGames_Screen | 20 | 1.00 | 0.57 | 0.87 | 0.00 | 0.00 | 0.00 | 1.00 | 3.00 | ▇▃▁▁▁ |
| matureMovies_Screen | 21 | 1.00 | 0.38 | 0.64 | 0.00 | 0.00 | 0.00 | 1.00 | 3.00 | ▇▃▁▁▁ |
| wkdySum_Screen | 37 | 1.00 | 3.46 | 3.10 | 0.00 | 1.25 | 2.50 | 4.75 | 24.00 | ▇▂▁▁▁ |
| wkndSum_Screen | 42 | 1.00 | 4.62 | 3.63 | 0.00 | 2.00 | 3.50 | 6.25 | 24.00 | ▇▃▁▁▁ |
| deveplopment_birth_complications | 759 | 0.94 | 0.37 | 0.74 | 0.00 | 0.00 | 0.00 | 1.00 | 8.00 | ▇▁▁▁▁ |
| deveplopment_pregnancy_complications | 741 | 0.94 | 0.61 | 1.02 | 0.00 | 0.00 | 0.00 | 1.00 | 12.00 | ▇▁▁▁▁ |
| bilingual_use | 81 | 0.99 | 1.02 | 1.70 | 0.00 | 0.00 | 0.00 | 1.00 | 9.00 | ▇▂▁▁▁ |
| education1stPar | 17 | 1.00 | 16.60 | 2.77 | 1.00 | 15.00 | 18.00 | 19.00 | 21.00 | ▁▁▂▅▇ |
| education2ndPar | 2458 | 0.79 | 16.38 | 3.06 | 0.00 | 15.00 | 18.00 | 18.00 | 21.00 | ▁▁▁▅▇ |
| educationAvg | 14 | 1.00 | 16.38 | 2.70 | 3.00 | 15.00 | 17.00 | 18.50 | 21.00 | ▁▁▂▇▇ |
| combinedIncome | 1015 | 0.91 | 7.23 | 2.42 | 1.00 | 6.00 | 8.00 | 9.00 | 10.00 | ▂▂▃▆▇ |
| householdSize | 279 | 0.98 | 4.70 | 1.55 | 0.00 | 4.00 | 4.00 | 5.00 | 19.00 | ▂▇▁▁▁ |
| econ_insecurities_sum | 134 | 0.99 | 0.47 | 1.10 | 0.00 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| area_deprivation_index | 876 | 0.93 | 94.63 | 21.18 | 1.07 | 87.85 | 98.78 | 108.39 | 125.75 | ▁▁▂▇▇ |
| lead_risk | 651 | 0.95 | 5.10 | 3.11 | 1.00 | 2.00 | 5.00 | 8.00 | 10.00 | ▇▆▅▅▆ |
| quartic_uniform_crime_reports | 649 | 0.95 | 12.09 | 5.78 | 0.00 | 9.41 | 12.28 | 15.20 | 24.29 | ▂▃▇▅▁ |
| neighbo_safety_parent_sum | 47 | 1.00 | 11.67 | 2.93 | 3.00 | 10.00 | 12.00 | 14.00 | 15.00 | ▁▁▃▆▇ |
| neighbo_safety_child_sum | 24 | 1.00 | 4.03 | 1.10 | 1.00 | 3.00 | 4.00 | 5.00 | 5.00 | ▁▁▃▆▇ |
| sumSchool_environment | 27 | 1.00 | 19.93 | 2.83 | 6.00 | 18.00 | 20.00 | 22.00 | 24.00 | ▁▁▂▇▇ |
| sumSchool_involvement | 26 | 1.00 | 13.06 | 2.37 | 4.00 | 12.00 | 13.00 | 15.00 | 16.00 | ▁▁▃▅▇ |
| sumSchool_disengagement | 25 | 1.00 | 3.74 | 1.46 | 2.00 | 3.00 | 4.00 | 5.00 | 8.00 | ▇▃▃▁▁ |
| parent_monitor_mean | 23 | 1.00 | 4.38 | 0.52 | 1.00 | 4.20 | 4.40 | 4.80 | 5.00 | ▁▁▁▃▇ |
| fam_conflict_parent | 12 | 1.00 | 2.54 | 1.96 | 0.00 | 1.00 | 2.00 | 4.00 | 9.00 | ▇▇▅▂▁ |
| fam_conflict_children | 27 | 1.00 | 2.04 | 1.95 | 0.00 | 0.00 | 2.00 | 3.00 | 9.00 | ▇▅▂▁▁ |
| prosocial_parent_mean | 62 | 0.99 | 1.75 | 0.40 | 0.00 | 1.67 | 2.00 | 2.00 | 2.00 | ▁▁▁▁▇ |
| prosocial_youth_mean | 33 | 1.00 | 1.68 | 0.37 | 0.00 | 1.33 | 1.67 | 2.00 | 2.00 | ▁▁▁▂▇ |
| phys_ind_daypweek_sum | 93 | 0.99 | 4.89 | 5.64 | 0.00 | 0.00 | 4.00 | 8.00 | 70.00 | ▇▁▁▁▁ |
| phys_team_daypweek_sum | 112 | 0.99 | 8.13 | 7.61 | 0.00 | 0.00 | 6.00 | 13.00 | 54.00 | ▇▃▁▁▁ |
| art_daypweek_sum | 118 | 0.99 | 4.96 | 6.24 | 0.00 | 0.00 | 4.00 | 8.00 | 54.00 | ▇▁▁▁▁ |
| sport_act_all_daypweek_sum | 273 | 0.98 | 17.90 | 13.72 | 0.00 | 8.00 | 16.00 | 26.00 | 170.00 | ▇▁▁▁▁ |
| physc_act_days | 28 | 1.00 | 3.50 | 2.32 | 0.00 | 2.00 | 3.00 | 5.00 | 7.00 | ▇▅▇▅▇ |
| ABCD_SVS01_ID | 0 | 1.00 | 50523.16 | 6430.86 | 39373.00 | 44951.00 | 50525.00 | 56092.00 | 61660.00 | ▇▇▇▇▇ |
| SNELLEN_AID_Y | 21 | 1.00 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| SNELLEN_AIDPRES_Y | 8987 | 0.24 | 0.62 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
| SNELLEN_VA_Y | 28 | 1.00 | 6.84 | 1.44 | 2.00 | 6.00 | 7.00 | 8.00 | 11.00 | ▁▃▇▆▁ |
| VIS_FLG | 11083 | 0.06 | 1.00 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▇▁▁ |
all_sum_vars %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>% dplyr::select(-1:-2) %>%
skimr::skim()
| Name | Piped data |
| Number of rows | 10387 |
| Number of columns | 66 |
| _______________________ | |
| Column type frequency: | |
| character | 6 |
| factor | 11 |
| numeric | 49 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECTKEY | 0 | 1 | 12 | 16 | 0 | 10387 | 0 |
| SRC_SUBJECT_ID | 0 | 1 | 16 | 16 | 0 | 10387 | 0 |
| INTERVIEW_DATE | 0 | 1 | 9 | 9 | 0 | 788 | 0 |
| SEX | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| EVENTNAME | 0 | 1 | 24 | 24 | 0 | 1 | 0 |
| SITE_ID_L | 0 | 1 | 6 | 6 | 0 | 21 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| REL_FAMILY_ID | 10387 | 0.00 | FALSE | 0 | 0: 0, 1: 0, 3: 0, 4: 0 |
| tobacco_before_preg | 10387 | 0.00 | FALSE | 0 | 0: 0, 1: 0 |
| tobacco_after_preg | 10387 | 0.00 | FALSE | 0 | 0: 0, 1: 0 |
| alcohol_before_preg | 10387 | 0.00 | FALSE | 0 | 0: 0, 1: 0 |
| alcohol_after_preg | 10387 | 0.00 | FALSE | 0 | 0: 0, 1: 0 |
| marijuana_before_preg | 10387 | 0.00 | FALSE | 0 | 0: 0, 1: 0 |
| marijuana_after_preg | 10387 | 0.00 | FALSE | 0 | 0: 0, 1: 0 |
| deveplopment_prematurity | 10387 | 0.00 | FALSE | 0 | 0: 0, 1: 0 |
| bilingual_status | 71 | 0.99 | FALSE | 2 | 0: 6232, 1: 4084 |
| bilingual_degree | 71 | 0.99 | FALSE | 3 | 0: 6232, 2: 2721, 1: 1363 |
| marital | 10387 | 0.00 | FALSE | 0 | mar: 0, wid: 0, div: 0, sep: 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| INTERVIEW_AGE | 0 | 1.00 | 144.04 | 7.95 | 127.00 | 137.00 | 144.00 | 151.00 | 168.00 | ▅▇▇▅▁ |
| SCHED_DELAY | 0 | 1.00 | 7.55 | 0.89 | 7.00 | 7.00 | 7.00 | 9.00 | 9.00 | ▇▁▁▁▃ |
| SCHED_HYBRID | 7674 | 0.26 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
| ACS_RAKED_PROPENSITY_SCORE | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| sleep_hours | 74 | 0.99 | 1.99 | 0.87 | 1.00 | 1.00 | 2.00 | 2.00 | 5.00 | ▆▇▃▁▁ |
| sleep_disturb | 74 | 0.99 | 2.05 | 1.05 | 1.00 | 1.00 | 2.00 | 3.00 | 5.00 | ▇▇▃▁▁ |
| sleep_initiate_maintain | 74 | 0.99 | 12.04 | 3.77 | 7.00 | 9.00 | 11.00 | 14.00 | 34.00 | ▇▃▁▁▁ |
| sleep_breath | 74 | 0.99 | 3.69 | 1.15 | 3.00 | 3.00 | 3.00 | 4.00 | 15.00 | ▇▁▁▁▁ |
| sleep_arousal | 74 | 0.99 | 3.31 | 0.72 | 3.00 | 3.00 | 3.00 | 3.00 | 11.00 | ▇▁▁▁▁ |
| sleep_transition | 75 | 0.99 | 7.80 | 2.43 | 6.00 | 6.00 | 7.00 | 9.00 | 28.00 | ▇▁▁▁▁ |
| sleep_somnolence | 74 | 0.99 | 7.14 | 2.59 | 5.00 | 5.00 | 6.00 | 8.00 | 25.00 | ▇▁▁▁▁ |
| sleep_hyperhydrosis | 74 | 0.99 | 2.34 | 0.99 | 2.00 | 2.00 | 2.00 | 2.00 | 10.00 | ▇▁▁▁▁ |
| sleep_total | 75 | 0.99 | 36.32 | 8.04 | 26.00 | 31.00 | 34.00 | 40.00 | 105.00 | ▇▂▁▁▁ |
| matureGames_Screen | 29 | 1.00 | 0.62 | 0.88 | 0.00 | 0.00 | 0.00 | 1.00 | 3.00 | ▇▃▁▂▁ |
| matureMovies_Screen | 37 | 1.00 | 0.49 | 0.65 | 0.00 | 0.00 | 0.00 | 1.00 | 3.00 | ▇▅▁▁▁ |
| wkdySum_Screen | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| wkndSum_Screen | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| deveplopment_birth_complications | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| deveplopment_pregnancy_complications | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| bilingual_use | 71 | 0.99 | 1.01 | 1.64 | 0.00 | 0.00 | 0.00 | 1.00 | 9.00 | ▇▂▁▁▁ |
| education1stPar | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| education2ndPar | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| educationAvg | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| combinedIncome | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| householdSize | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| econ_insecurities_sum | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| area_deprivation_index | 6957 | 0.33 | 95.81 | 17.86 | 3.39 | 88.32 | 98.72 | 107.64 | 125.75 | ▁▁▁▇▇ |
| lead_risk | 6878 | 0.34 | 4.98 | 3.12 | 1.00 | 2.00 | 5.00 | 8.00 | 10.00 | ▇▅▅▃▆ |
| quartic_uniform_crime_reports | 6877 | 0.34 | 12.03 | 5.65 | 0.00 | 9.41 | 12.28 | 15.20 | 24.29 | ▂▃▇▅▁ |
| neighbo_safety_parent_sum | 119 | 0.99 | 11.62 | 2.80 | 3.00 | 10.00 | 12.00 | 14.00 | 15.00 | ▁▁▅▆▇ |
| neighbo_safety_child_sum | 32 | 1.00 | 4.09 | 1.00 | 1.00 | 4.00 | 4.00 | 5.00 | 5.00 | ▁▁▃▆▇ |
| sumSchool_environment | 35 | 1.00 | 19.64 | 2.78 | 6.00 | 18.00 | 20.00 | 22.00 | 24.00 | ▁▁▂▇▇ |
| sumSchool_involvement | 35 | 1.00 | 12.66 | 2.33 | 4.00 | 11.00 | 13.00 | 14.00 | 16.00 | ▁▁▅▇▇ |
| sumSchool_disengagement | 35 | 1.00 | 3.99 | 1.35 | 2.00 | 3.00 | 4.00 | 5.00 | 8.00 | ▇▆▅▂▁ |
| parent_monitor_mean | 32 | 1.00 | 4.49 | 0.47 | 1.00 | 4.20 | 4.60 | 4.80 | 5.00 | ▁▁▁▂▇ |
| fam_conflict_parent | 67 | 0.99 | 2.43 | 1.97 | 0.00 | 1.00 | 2.00 | 4.00 | 9.00 | ▇▇▃▂▁ |
| fam_conflict_children | 36 | 1.00 | 1.91 | 1.82 | 0.00 | 0.00 | 1.00 | 3.00 | 9.00 | ▇▅▂▁▁ |
| prosocial_parent_mean | 86 | 0.99 | 1.72 | 0.42 | 0.00 | 1.67 | 2.00 | 2.00 | 2.00 | ▁▁▁▁▇ |
| prosocial_youth_mean | 32 | 1.00 | 1.71 | 0.37 | 0.00 | 1.33 | 2.00 | 2.00 | 2.00 | ▁▁▁▂▇ |
| phys_ind_daypweek_sum | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| phys_team_daypweek_sum | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| art_daypweek_sum | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| sport_act_all_daypweek_sum | 10387 | 0.00 | NaN | NA | NA | NA | NA | NA | NA | |
| physc_act_days | 28 | 1.00 | 3.79 | 2.15 | 0.00 | 2.00 | 4.00 | 5.00 | 7.00 | ▅▃▇▅▆ |
| ABCD_SVS01_ID | 0 | 1.00 | 50511.20 | 6441.84 | 39372.00 | 44933.00 | 50506.00 | 56094.00 | 61661.00 | ▇▇▇▇▇ |
| SNELLEN_AID_Y | 1626 | 0.84 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
| SNELLEN_AIDPRES_Y | 7945 | 0.24 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
| SNELLEN_VA_Y | 1625 | 0.84 | 7.11 | 1.53 | 2.00 | 6.00 | 7.00 | 8.00 | 11.00 | ▁▃▇▇▁ |
| VIS_FLG | 9840 | 0.05 | 1.00 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▇▁▁ |
make sure that there are no members from the same family at different sites
all_sum_vars_baseline <- all_sum_vars %>% filter(EVENTNAME =="baseline_year_1_arm_1")
all_sum_vars_baseline %>% count(SITE_ID_L)
## SITE_ID_L n
## 1 site01 405
## 2 site02 558
## 3 site03 629
## 4 site04 745
## 5 site05 377
## 6 site06 580
## 7 site07 339
## 8 site08 350
## 9 site09 433
## 10 site10 736
## 11 site11 448
## 12 site12 600
## 13 site13 726
## 14 site14 606
## 15 site15 457
## 16 site16 1010
## 17 site17 577
## 18 site18 384
## 19 site19 549
## 20 site20 701
## 21 site21 599
## 22 site22 36
# check if there are members from the same family at different sites. There are 6 of them.
all_sum_vars_baseline %>%
drop_na(SITE_ID_L) %>%
filter(SITE_ID_L != "site22") %>%
count(REL_FAMILY_ID, SITE_ID_L) %>%
spread(SITE_ID_L, n, fill = 0) %>%
dplyr::select(-REL_FAMILY_ID) %>%
as.matrix %>%
crossprod
## site01 site02 site03 site04 site05 site06 site07 site08 site09 site10
## site01 495 0 0 0 0 0 0 0 0 0
## site02 0 1048 0 0 0 0 0 0 0 0
## site03 0 0 751 0 0 0 0 0 0 0
## site04 0 0 0 955 0 0 0 0 0 0
## site05 0 0 0 0 473 0 0 0 0 0
## site06 0 0 0 0 0 688 0 0 0 0
## site07 0 0 0 0 0 0 425 0 0 0
## site08 0 0 0 0 0 0 0 434 0 0
## site09 0 0 0 0 0 0 0 0 479 0
## site10 0 0 0 0 0 0 0 0 0 910
## site11 0 0 0 0 0 0 0 0 0 0
## site12 0 0 0 0 0 0 0 0 0 0
## site13 0 0 0 0 0 0 0 0 0 0
## site14 0 0 0 0 0 0 0 0 0 0
## site15 0 0 0 0 0 0 0 0 0 0
## site16 0 0 0 0 0 0 0 0 0 0
## site17 0 0 0 0 0 0 0 0 0 0
## site18 0 0 0 0 0 0 0 0 0 0
## site19 0 0 0 0 0 0 0 0 0 0
## site20 0 0 0 0 0 0 0 0 0 0
## site21 0 0 0 0 0 0 0 0 0 0
## site11 site12 site13 site14 site15 site16 site17 site18 site19 site20
## site01 0 0 0 0 0 0 0 0 0 0
## site02 0 0 0 0 0 0 0 0 0 0
## site03 0 0 0 0 0 0 0 0 0 0
## site04 0 0 0 0 0 0 0 0 0 0
## site05 0 0 0 0 0 0 0 0 0 0
## site06 0 0 0 0 0 0 0 0 0 0
## site07 0 0 0 0 0 0 0 0 0 0
## site08 0 0 0 0 0 0 0 0 0 0
## site09 0 0 0 0 0 0 0 0 0 0
## site10 0 0 0 0 0 0 0 0 0 0
## site11 562 0 0 0 0 0 0 0 0 0
## site12 0 746 0 0 0 0 0 0 0 0
## site13 0 0 888 0 0 0 0 0 0 0
## site14 0 0 0 1106 0 0 0 0 0 0
## site15 0 0 0 0 549 0 0 0 0 0
## site16 0 0 0 0 0 1394 0 0 0 0
## site17 0 0 0 0 0 0 697 0 0 0
## site18 0 0 0 0 0 0 0 448 0 0
## site19 0 0 0 0 0 0 0 0 1015 0
## site20 0 0 0 0 0 0 0 0 0 1187
## site21 0 0 0 0 0 0 0 0 0 0
## site21
## site01 0
## site02 0
## site03 0
## site04 0
## site05 0
## site06 0
## site07 0
## site08 0
## site09 0
## site10 0
## site11 0
## site12 0
## site13 0
## site14 0
## site15 0
## site16 0
## site17 0
## site18 0
## site19 0
## site20 0
## site21 723
#below will remove those
all_sum_vars_baseline_no_dup <- all_sum_vars_baseline %>%
drop_na(SITE_ID_L) %>%
filter(SITE_ID_L != "site22") %>%
group_by(REL_FAMILY_ID) %>%
nest(SITE_ID_L, .key="SITE_ID_L") %>%
mutate(dup = ifelse(length(c(unlist(SITE_ID_L)))==1,0,
ifelse(length(unique(c(unlist(SITE_ID_L)))) > 1,1,0))) %>%
unnest(SITE_ID_L) %>%
ungroup()
## Warning: Supplying `...` without names was deprecated in tidyr 1.0.0.
## ℹ Please specify a name for each selection.
## ℹ Did you want `SITE_ID_L = SITE_ID_L`?
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
family_exclude <- unique(all_sum_vars_baseline_no_dup$REL_FAMILY_ID[which(all_sum_vars_baseline_no_dup$dup==1)])
all_sum_vars_no_dup <- all_sum_vars%>%
filter(!REL_FAMILY_ID %in% family_exclude)%>%
drop_na(SITE_ID_L) %>%
filter(SITE_ID_L != "site22")
### test whether the data set has all the features
setdiff(features,all_sum_vars_no_dup%>% colnames())
## character(0)
Samples: REL_FAMILY_ID (9856 Levels) SITE_ID_L (need to remove 22nd site. having too few subjects) ALSO make sure about EVENTNAME
Target: Factor analysis of psychopathology: pfactor
46 Features: soc-demo-lifestyle-dev
Features by catergories:
Child Sleep (8): sleep_hours sleep_disturb sleep_initiate_maintain sleep_breath sleep_arousal sleep_transition sleep_somnolence sleep_hyperhydrosis
Physical Activity (4): phys_ind_daypweek_sum phys_team_daypweek_sum art_daypweek_sum physc_act_days
Child Screen Use (4): matureGames_Screen matureMovies_Screen wkdySum_Screen wkndSum_Screen
Parent Drug Use (6): tobacco_before_preg tobacco_after_preg alcohol_before_preg alcohol_after_preg marijuana_before_preg marijuana_after_preg
Child Developmental Adversity (3): deveplopment_prematurity deveplopment_birth_complications deveplopment_pregnancy_complications
Child Socio-Demographics (14): bilingual_use marital educationAvg combinedIncome householdSize econ_insecurities_sum area_deprivation_index lead_risk quartic_uniform_crime_reports neighbo_safety_parent_sum neighbo_safety_child_sum sumSchool_environment sumSchool_involvement sumSchool_disengagement
Social Interaction (5): parent_monitor_mean fam_conflict_parent fam_conflict_children prosocial_parent_mean prosocial_youth_mean
set up vector of names based on different catorgies of features
all_features_no_dup <- all_sum_vars_no_dup %>% dplyr::select(all_of(subj_info),all_of(features))
## change the character variables and factors into numeric values
factor_features <- c("tobacco_before_preg","tobacco_after_preg","alcohol_before_preg" ,"alcohol_after_preg","marijuana_before_preg","marijuana_after_preg","deveplopment_prematurity", "marital" )
all_features_no_dup_num <- all_features_no_dup %>%
mutate(marital = recode_factor(as.factor(marital),married=1, widowed=2,divorced=3,separated=4,neverMarried=5,livingWithPartner=6,.default = 1))
#%>%
# mutate_if(is.factor,as.numeric)
###check the NAs in both baseline and followup data sets
all_features_no_dup_num_baseline <- all_features_no_dup_num%>%
filter(EVENTNAME=="baseline_year_1_arm_1")
naniar::vis_miss(all_features_no_dup_num_baseline)
all_features_no_dup_num_followup <- all_features_no_dup_num%>%
filter(EVENTNAME=="2_year_follow_up_y_arm_1")
naniar::vis_miss(all_features_no_dup_num_followup)
The following variables are only found in the baseline. Those variables are:
“phys_ind_daypweek_sum”
“phys_team_daypweek_sum”
“art_daypweek_sum”
“wkdySum_Screen”
“wkndSum_Screen”
“tobacco_before_preg”
“tobacco_after_preg”
“alcohol_before_preg”
“alcohol_after_preg”
“marijuana_before_preg”
“marijuana_after_preg”
“deveplopment_prematurity”
“deveplopment_birth_complications”
“deveplopment_pregnancy_complications”
“marital”
“educationAvg”
“combinedIncome”
“householdSize”
“econ_insecurities_sum”
“area_deprivation_index”
“lead_risk”
“quartic_uniform_crime_reports”
We use the exact replications in the followup analysis.
The following variables appear in both baseline and followup data:
“sleep_hours”
“sleep_disturb”
“sleep_initiate_maintain”
“sleep_breath”
“sleep_arousal”
“sleep_transition”
“sleep_somnolence”
“sleep_hyperhydrosis”
“physc_act_days”
“matureGames_Screen”
“matureMovies_Screen”
“bilingual_use”
“neighbo_safety_parent_sum”
“neighbo_safety_child_sum”
“sumSchool_environment”
“sumSchool_involvement”
“sumSchool_disengagement”
“parent_monitor_mean”
“fam_conflict_parent”
“fam_conflict_children”
“prosocial_parent_mean”
“prosocial_youth_mean”
features_fix_na <- c("phys_ind_daypweek_sum","phys_team_daypweek_sum","art_daypweek_sum" ,"wkdySum_Screen" ,"wkndSum_Screen","tobacco_before_preg","tobacco_after_preg","alcohol_before_preg","alcohol_after_preg","marijuana_before_preg","marijuana_after_preg","deveplopment_prematurity","deveplopment_birth_complications","deveplopment_pregnancy_complications","marital","educationAvg","combinedIncome","householdSize","econ_insecurities_sum","area_deprivation_index","lead_risk","quartic_uniform_crime_reports" )
all_features_no_dup_na_fix_baseline <- all_features_no_dup_num_baseline%>%
dplyr::select(all_of(c("SUBJECTKEY","SITE_ID_L")),all_of(features_fix_na))
all_features_no_dup_na_fix_followup <- all_features_no_dup_num_followup %>%
dplyr::select(-all_of(features_fix_na))
all_features_no_dup_na_fixed_followup <- left_join(all_features_no_dup_na_fix_followup,
all_features_no_dup_na_fix_baseline,
by =c("SUBJECTKEY","SITE_ID_L"))
## plot the information of missingness after all the NAs are fixed
naniar::vis_miss(all_features_no_dup_na_fixed_followup)
all_features_no_dup_na_fixed <- bind_rows(all_features_no_dup_num_baseline,all_features_no_dup_na_fixed_followup)
Making data splits by site.
site_col <- all_features_no_dup_na_fixed %>%
distinct(SITE_ID_L) %>%
arrange(SITE_ID_L)
site_list <- as.list(site_col$SITE_ID_L)
site_char <- as.character(unlist(site_col$SITE_ID_L))
split_list <- purrr::map(site_list, ~split_func(.x,data_input =all_features_no_dup_na_fixed ))
names(split_list) <- site_char
Join features and response across sites
feature_resp_join <- function(site_input){
features_list <- split_list[[site_input]]
baseline_train_features <- training(features_list)%>%
filter(EVENTNAME == "baseline_year_1_arm_1")
baseline_test_features <- testing(features_list)%>%
filter(EVENTNAME == "baseline_year_1_arm_1")
followup_train_features <- training(features_list)%>%
filter(EVENTNAME == "2_year_follow_up_y_arm_1")
followup_test_features <- testing(features_list)%>%
filter(EVENTNAME == "2_year_follow_up_y_arm_1")
baseline_train <- baseline_train_gfactor[[site_input]]
baseline_test <- baseline_test_gfactor[[site_input]]
followup_train <- followup_train_gfactor[[site_input]]
followup_test <- followup_test_gfactor[[site_input]]
## NAs are removed here
baseline_train_all <- full_join(baseline_train_features,baseline_train,by = "SUBJECTKEY")%>%
drop_na("gfactor")
baseline_test_all <- full_join(baseline_test_features,baseline_test,by = "SUBJECTKEY")%>%
drop_na("gfactor")
followup_train_all <- full_join(followup_train_features,followup_train,by = "SUBJECTKEY")%>%
drop_na("gfactor")
followup_test_all <- full_join(followup_test_features,followup_test,by = "SUBJECTKEY")%>%
drop_na("gfactor")
baseline_train_select <- baseline_train_all%>%
dplyr::select(-all_of(subj_info))
baseline_test_select <- baseline_test_all%>%
dplyr::select(-all_of(subj_info))
followup_train_select <- followup_train_all%>%
dplyr::select(-all_of(subj_info))
followup_test_select <- followup_test_all%>%
dplyr::select(-all_of(subj_info))
return(list(baseline_train = baseline_train_all,
baseline_test=baseline_test_all,
followup_train=followup_train_all,
followup_test=followup_test_all,
baseline_train_select = baseline_train_select,
baseline_test_select=baseline_test_select,
followup_train_select=followup_train_select,
followup_test_select=followup_test_select))
}
gfactor_ses_split_list <- purrr::map(site_char,~feature_resp_join(site_input = .))
names(gfactor_ses_split_list) <- site_char
gfactor_ses_baseline_train <- purrr::map(gfactor_ses_split_list,"baseline_train")
gfactor_ses_baseline_test <- purrr::map(gfactor_ses_split_list,"baseline_test")
gfactor_ses_followup_train <- purrr::map(gfactor_ses_split_list,"followup_train")
gfactor_ses_followup_test <- purrr::map(gfactor_ses_split_list,"followup_test")
gfactor_ses_baseline_train_select <- purrr::map(gfactor_ses_split_list,"baseline_train_select")
gfactor_ses_baseline_test_select <- purrr::map(gfactor_ses_split_list,"baseline_test_select")
gfactor_ses_followup_train_select <- purrr::map(gfactor_ses_split_list,"followup_train_select")
gfactor_ses_followup_test_select <- purrr::map(gfactor_ses_split_list,"followup_test_select")
This function can be run by replacing select and map function in the r functions file. Or, run it without loading the PLS packages.
The process of data preparation is as follows
processed_features_gfactor_ses_list <- purrr::pmap(list(gfactor_ses_baseline_train,
gfactor_ses_baseline_test,
gfactor_ses_followup_train,
gfactor_ses_followup_test),
~data_processing_cross_sites_seperate_dummy(baseline_train=..1,
baseline_test=..2,
followup_train=..3,
followup_test=..4))
save and load the processed datasets
Some of the factor features are all zero in the test sets. They are removed in the recipe because they have a standard deviation of 0. Then those variables are deleted. When fit the model in the test sets, this problem leads to the model fitting function failing to run.
As a part of the imputation that is never done by the recipe function. Those NAs in the data set are going to be replaced with 0.
We find out that 1. observations in site 3 do not have crime report. 2. observations in site 7 do not have widowers.
### find the features
train_colnames_site03 <- processed_features_gfactor_ses_list[["site03"]][["output_train_baseline"]]%>% colnames()
test_colnames_site03 <- processed_features_gfactor_ses_list[["site03"]][["output_test_baseline"]]%>% colnames()
site03_add <- setdiff(train_colnames_site03,test_colnames_site03)
site03_add
## [1] "quartic_uniform_crime_reports"
### check the original data sets
gfactor_ses_baseline_test$site03[[site03_add]]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [26] 0 0 0 0 0 0 0 0 0 NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [51] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [76] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [101] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NA 0 0 0 0 0 0 0 0 0 0
## [126] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [151] 0 0 0 0 0 0 0 0 0 0 0 0 0 NA 0 0 0 0 0 0 0 0 0 0 0
## [176] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [201] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [226] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [251] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [276] 0 0 0 0 0 0 0 0 NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [301] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [326] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NA 0 0 0 0 0
## [351] 0 0 0 0 0 0 0 0 0 0 NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [376] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [401] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [426] 0 0 0 NA 0 0 0 0 0 0 0 0 0 0 0 0 NA 0 0 0 0 0 0 0 NA
## [451] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [476] 0 0 0 0 0 0 0 0 0 0 NA 0 NA 0 0 0 0 0 0 0 0 0 0 NA 0
## [501] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [526] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [551] 0 0 NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [576] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
### manually fix baseline and followup data
processed_features_gfactor_ses_list[["site03"]][["output_test_baseline"]] <- processed_features_gfactor_ses_list[["site03"]][["output_test_baseline"]]%>% mutate(quartic_uniform_crime_reports=0)
processed_features_gfactor_ses_list[["site03"]][["output_test_followup"]] <- processed_features_gfactor_ses_list[["site03"]][["output_test_followup"]]%>% mutate(quartic_uniform_crime_reports=0)
### do the same thing for site07, site20
### find the features
train_colnames_site07 <- processed_features_gfactor_ses_list[["site07"]][["output_train_baseline"]]%>% colnames()
test_colnames_site07 <- processed_features_gfactor_ses_list[["site07"]][["output_test_baseline"]]%>% colnames()
site07_add <- setdiff(train_colnames_site07,test_colnames_site07)
site07_add
## [1] "marital_X2"
### check the original data sets
which(gfactor_ses_baseline_test$site07[["marital"]]==2)
## integer(0)
### manually fix baseline and followup data
processed_features_gfactor_ses_list[["site07"]][["output_test_baseline"]] <- processed_features_gfactor_ses_list[["site07"]][["output_test_baseline"]]%>% mutate(marital_X2=0)
processed_features_gfactor_ses_list[["site07"]][["output_test_followup"]] <- processed_features_gfactor_ses_list[["site07"]][["output_test_followup"]]%>% mutate(marital_X2=0)
extract the processed datasets
processed_ses_baseline_train <- purrr::map(processed_features_gfactor_ses_list,"output_train_baseline")
processed_ses_baseline_test <- purrr::map(processed_features_gfactor_ses_list,"output_test_baseline")
processed_ses_followup_train <- purrr::map(processed_features_gfactor_ses_list,"output_train_followup")
processed_ses_followup_test <- purrr::map(processed_features_gfactor_ses_list,"output_test_followup")
processed_ses_baseline_train_select <- purrr::map(processed_ses_baseline_train,~dplyr::select(.,-all_of(subj_info)))
processed_ses_baseline_test_select <- purrr::map(processed_ses_baseline_test,~dplyr::select(.,-all_of(subj_info)))
processed_ses_followup_train_select <- purrr::map(processed_ses_followup_train,~dplyr::select(.,-all_of(subj_info)))
processed_ses_followup_test_select <- purrr::map(processed_ses_followup_test,~dplyr::select(.,-all_of(subj_info)))
### fit the enet model
### baseline
dummy_features <- processed_ses_baseline_train_select[[1]] %>% dplyr::select(-"gfactor") %>% colnames()
ses_baseline_recipe_list <- purrr::map(.x = processed_ses_baseline_train_select,
~recipe_prep(train_input=.x, features_input = dummy_features))
ses_pls_fit_baseline <-purrr::map(.x=ses_baseline_recipe_list,
~pls_tune(recipe_input = .x,feature_input =dummy_features ))
ses_pls_fit_baseline_wf <- purrr::map(ses_pls_fit_baseline,"pls_final_wf")
ses_pls_model_fit_baseline <- purrr::pmap(list(ses_baseline_recipe_list,
ses_pls_fit_baseline_wf,
processed_ses_baseline_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
ses_pls_pred_baseline <- purrr::map(ses_pls_model_fit_baseline,"model_predict")
ses_pls_model_fit_baseline_train <- purrr::pmap(list(ses_baseline_recipe_list,
ses_pls_fit_baseline_wf,
processed_ses_baseline_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
ses_pls_pred_baseline_train <- purrr::map(ses_pls_model_fit_baseline_train,"model_predict")
ses_baseline_metric <- purrr::map2(.x=ses_pls_pred_baseline,
.y=processed_ses_baseline_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `"gfactor"` instead of `.data$gfactor`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `"model_predict"` instead of `.data$model_predict`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ses_baseline_metric%>%
kableExtra::kbl(caption = "metrics for all sites in baseline") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.5296415 | 0.2799987 | 0.6674212 | 0.8473496 | site01 |
| 0.4190442 | 0.1738486 | 0.7130802 | 0.9080960 | site02 |
| 0.4256129 | 0.1798805 | 0.7140928 | 0.9048470 | site03 |
| 0.4605481 | 0.2073923 | 0.7073440 | 0.8896793 | site04 |
| 0.5315242 | 0.2817477 | 0.6726602 | 0.8463295 | site05 |
| 0.4110856 | 0.1633670 | 0.7213300 | 0.9138609 | site06 |
| 0.6184235 | 0.3801687 | 0.6171497 | 0.7860778 | site07 |
| 0.4615650 | 0.2100628 | 0.7000236 | 0.8874722 | site08 |
| 0.4282846 | 0.1808457 | 0.7177252 | 0.9039192 | site09 |
| 0.5028080 | 0.2527864 | 0.6742314 | 0.8638105 | site10 |
| 0.5056590 | 0.2527789 | 0.6734357 | 0.8634277 | site11 |
| 0.6789290 | 0.4495558 | 0.5814396 | 0.7412934 | site12 |
| 0.4856282 | 0.2338633 | 0.6846422 | 0.8746792 | site13 |
| 0.3714416 | 0.1290946 | 0.7449125 | 0.9324399 | site14 |
| 0.5399691 | 0.2913159 | 0.6641652 | 0.8408262 | site15 |
| 0.3593052 | 0.1130117 | 0.7340652 | 0.9413209 | site16 |
| 0.4069269 | 0.1549192 | 0.7415909 | 0.9184467 | site17 |
| 0.4333639 | 0.1773149 | 0.7089887 | 0.9058161 | site18 |
| 0.5454390 | 0.2961545 | 0.6714145 | 0.8381401 | site19 |
| 0.5431077 | 0.2949598 | 0.6648759 | 0.8390482 | site20 |
| 0.5461596 | 0.2980361 | 0.6480221 | 0.8370681 | site21 |
ses_baseline_metric_avg <- average_metric_one_mod(metric_list =ses_baseline_metric)
avg_table_var_names <- c("correlation (sd)", "tradrsq (sd)","MAE (sd)","RMSE (sd)" )
ses_baseline_metric_avg_table <- ses_baseline_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
ses_baseline_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in baseline")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.486 (0.08) | 0.238 (0.083) | 0.687 (0.041) | 0.871 (0.049) |
### fit the enet model
### followup
ses_followup_recipe_list <- purrr::map(.x = processed_ses_followup_train_select,
~recipe_prep(train_input=.x, features_input = dummy_features))
ses_pls_fit_followup <-purrr::map(.x=ses_followup_recipe_list,
~pls_tune(recipe_input = .x,feature_input =dummy_features ))
ses_pls_fit_followup_wf <- purrr::map(ses_pls_fit_followup,"pls_final_wf")
ses_pls_model_fit_followup <- pmap(list(ses_followup_recipe_list,
ses_pls_fit_followup_wf,
processed_ses_followup_test_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
ses_pls_pred_followup <- purrr::map(ses_pls_model_fit_followup,"model_predict")
ses_pls_model_fit_followup_train <- pmap(list(ses_followup_recipe_list,
ses_pls_fit_followup_wf,
processed_ses_followup_train_select),~
model_final_fit(recipe_input = ..1,
wf_input = ..2,
test_data = ..3))
ses_pls_pred_followup_train <- purrr::map(ses_pls_model_fit_followup_train,"model_predict")
ses_followup_metric <- purrr::map2(.x=ses_pls_pred_followup,
.y=processed_ses_followup_test,~metric_compute_site(data_input =.x ,
site_input = .y)) %>%
do.call(rbind,.)
ses_followup_metric%>%
kableExtra::kbl(caption = "metrics for all sites in followup") %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation | tradrsq | MAE | RMSE | site |
|---|---|---|---|---|
| 0.5609364 | 0.3099483 | 0.6578762 | 0.8287038 | site01 |
| 0.3933879 | 0.1465595 | 0.7423183 | 0.9226148 | site02 |
| 0.4693260 | 0.2192417 | 0.7112744 | 0.8823974 | site03 |
| 0.4848541 | 0.2324355 | 0.6873146 | 0.8753572 | site04 |
| 0.4999528 | 0.2419532 | 0.7086454 | 0.8687557 | site05 |
| 0.3927633 | 0.1489401 | 0.7207011 | 0.9213176 | site06 |
| 0.5975769 | 0.3531784 | 0.6402928 | 0.8011529 | site07 |
| 0.4031141 | 0.1550298 | 0.7219744 | 0.9168504 | site08 |
| 0.3874066 | 0.1368465 | 0.7304625 | 0.9269844 | site09 |
| 0.4869327 | 0.2267327 | 0.6859717 | 0.8784987 | site10 |
| 0.4715110 | 0.2112905 | 0.7035412 | 0.8858013 | site11 |
| 0.6377458 | 0.3774529 | 0.6222828 | 0.7876826 | site12 |
| 0.4072029 | 0.1456935 | 0.7135677 | 0.9231990 | site13 |
| 0.4178285 | 0.1636509 | 0.7234086 | 0.9134719 | site14 |
| 0.6179106 | 0.3770292 | 0.6136548 | 0.7878166 | site15 |
| 0.3582303 | 0.1101277 | 0.7489837 | 0.9425826 | site16 |
| 0.2845979 | 0.0469678 | 0.7740788 | 0.9748955 | site17 |
| 0.4256326 | 0.1699855 | 0.7334627 | 0.9090941 | site18 |
| 0.5330800 | 0.2836585 | 0.6571195 | 0.8452189 | site19 |
| 0.5625736 | 0.3097989 | 0.6645845 | 0.8300251 | site20 |
| 0.4699542 | 0.2125067 | 0.6706179 | 0.8862580 | site21 |
ses_followup_metric_avg <- average_metric_one_mod(metric_list =ses_followup_metric)
ses_followup_metric_avg_table <- ses_followup_metric_avg %>%
mutate_if(is.numeric, round, digits=3)%>%
mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
select_if(is.character)
ses_followup_metric_avg_table%>%
dplyr::select(all_of(avg_table_var_names))%>%
kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in followup")) %>%
kableExtra::kable_classic(full_width = F,
html_font = "Cambria")
| correlation (sd) | tradrsq (sd) | MAE (sd) | RMSE (sd) |
|---|---|---|---|
| 0.47 (0.091) | 0.218 (0.09) | 0.697 (0.043) | 0.881 (0.052) |
Baseline
## get the model grid
ses_pls_grid_baseline <- purrr::map(ses_pls_fit_baseline,"pls_grid")
ses_pls_param_baseline <- purrr::map(ses_pls_fit_baseline,"best_pls_model")
factor_metric_plot <- function(grid_input, param_input){
selected_comp <- param_input$num_comp
comp_plot <- grid_input %>%
collect_metrics() %>%
ggplot(aes(num_comp, mean, col = .metric)) +
geom_point() +
geom_line() +
geom_vline(xintercept = selected_comp, size=1.5)+
scale_x_continuous(n.breaks = 26) +
labs(x = "Number of components",
y = "Indicator",
title = "Plot of RMSE vs number of components ",
subtitle = paste0("Optimal number of components is ", selected_comp)) +
facet_grid(.metric ~.) +
theme_few() +
theme(legend.position = "none")
return(comp_plot)
}
comp_metric_plot_baseline <- purrr::map2(.x = ses_pls_grid_baseline,
.y = ses_pls_param_baseline,
~factor_metric_plot(grid_input= .x ,
param_input = .y))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
comp_metric_plot_baseline
## $site01
##
## $site02
##
## $site03
##
## $site04
##
## $site05
##
## $site06
##
## $site07
##
## $site08
##
## $site09
##
## $site10
##
## $site11
##
## $site12
##
## $site13
##
## $site14
##
## $site15
##
## $site16
##
## $site17
##
## $site18
##
## $site19
##
## $site20
##
## $site21
Followup
## get the model grid
ses_pls_grid_followup <- purrr::map(ses_pls_fit_followup,"pls_grid")
ses_pls_param_followup <- purrr::map(ses_pls_fit_followup,"best_pls_model")
comp_metric_plot_followup <- purrr::map2(.x = ses_pls_grid_followup,
.y = ses_pls_param_followup,
~factor_metric_plot(grid_input= .x ,
param_input = .y))
comp_metric_plot_followup
## $site01
##
## $site02
##
## $site03
##
## $site04
##
## $site05
##
## $site06
##
## $site07
##
## $site08
##
## $site09
##
## $site10
##
## $site11
##
## $site12
##
## $site13
##
## $site14
##
## $site15
##
## $site16
##
## $site17
##
## $site18
##
## $site19
##
## $site20
##
## $site21
Loading tables for plotting
plotting_names <- read.csv(paste0(scriptfold,"Common_psy_gene_brain_all/NonBrainFeaturesRead.csv"))
## clean plotting names
plotting_names[72,1] <- "marital_X2"
plotting_names[73,1] <- "marital_X3"
plotting_names[74,1] <- "marital_X4"
plotting_names[75,1] <- "marital_X5"
plotting_names[76,1] <- "marital_X6"
names(plotting_names) <- c("feature_names","plotting_name")
Model across all sites.
### combine the data set with the same train and test fold
data_all_site_baseline <- rbind(processed_ses_baseline_train_select[[1]],
processed_ses_baseline_test_select[[1]])
## retune the model
all_data_recipe_baseline <- recipe_prep_scale(train_input=data_all_site_baseline,
features_input = dummy_features)
all_data_fit_baseline <- pls_tune(recipe_input = all_data_recipe_baseline,
feature_input = dummy_features)
all_data_wf_baseline <- all_data_fit_baseline[["pls_final_wf"]]
## final fit the model
all_data_final_fit_baseline <- all_data_wf_baseline%>%
parsnip::extract_spec_parsnip()%>%
parsnip::fit(data = data_all_site_baseline, formula= as.formula("gfactor~."))
## get the coefficients
tidy_all_data_final_fit_baseline <- all_data_final_fit_baseline%>%
tidy()
### extract the number of components
all_data_param_baseline <-all_data_fit_baseline[["best_pls_model"]][["num_comp"]]
### extract the variance explained by each component
var_explained <- all_data_final_fit_baseline[["fit"]][["prop_expl_var"]][["X"]]
### plotting feature importance based on the model with all the data
comp_idx_vec <- c(1:all_data_param_baseline)
tidy_all_data_final_fit_baseline <- tidy_all_data_final_fit_baseline %>%
rename(feature_names = term)
tidy_all_data_final_fit_baseline_with_name<- full_join(tidy_all_data_final_fit_baseline,
plotting_names, by = "feature_names")%>%
filter(feature_names != "Y", # outcome variable col name
)%>% drop_na()
tidy_all_data_final_fit_baseline_list <- purrr::map(comp_idx_vec,
~filter(tidy_all_data_final_fit_baseline_with_name,
component == .)%>%
dplyr::select(all_of(c("plotting_name","value"))))
### get the variable order from the first component:
tidy_all_data_final_fit_baseline_reordered <- tidy_all_data_final_fit_baseline_list[[1]] %>%
arrange(value)
tidy_all_data_final_fit_baseline_reordered<- tidy_all_data_final_fit_baseline_reordered$plotting_name
pls_vi_plot_with_label <- function(data_input=tidy_all_data_final_fit_baseline_list[[1]],
var_input = var_explained[1],
idx_input = comp_idx_vec[1],
reorder_name = tidy_all_data_final_fit_baseline_reordered){
### arrange the data from small to large
data_input <- data_input %>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =reorder_name))
range_value <- range(data_input$value)
var_title_long <- paste0("component ", idx_input," var explained ",round(var_input,3)*100,"%")
var_title_short <- paste0(round(var_input,3)*100,"%")
var_title_medium <- paste0("comp ", idx_input," \n ",round(var_input,3)*100,"%")
bar_plot <- ggplot(data_input, aes(x=.data[["value"]], y=plotting_name)) +
geom_bar(stat="identity")+
theme_classic() +
scale_x_continuous(limits = c(round(range_value[1],2)-0.05, round(range_value[2],2)+0.05),
breaks = c(round(range_value[1],2)-0.05,0, round(range_value[2],2)+0.05))+
labs(title = var_title_medium)+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 20,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 20),
legend.text = element_blank(),
plot.title = element_text(size=20))
return(bar_plot)
}
comp_one_plot <- pls_vi_plot_with_label(data_input=tidy_all_data_final_fit_baseline_list[[1]],
var_input = var_explained[1],
idx_input = comp_idx_vec[1])
pls_vi_plot_no_label <- function(data_input=tidy_all_data_final_fit_baseline_list[[2]],
var_input = var_explained[2],
idx_input = comp_idx_vec[2],
reorder_name = tidy_all_data_final_fit_baseline_reordered){
### arrange the data from small to large
data_input <- data_input %>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =reorder_name))
range_value <- range(data_input$value)
var_title_long <- paste0("component ", idx_input," var explained ",round(var_input,3)*100,"%")
var_title_short <- paste0(round(var_input,3)*100,"%")
var_title_medium <- paste0("comp ", idx_input," \n ",round(var_input,3)*100,"%")
bar_plot <- ggplot(data_input, aes(x=.data[["value"]], y=plotting_name)) +
geom_bar(stat="identity")+
scale_x_continuous(limits = c(round(range_value[1],2)-0.05, round(range_value[2],2)+0.05),
breaks = c(round(range_value[1],2)-0.05,0, round(range_value[2],2)+0.05))+
theme_classic() +
labs(title = var_title_medium)+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 20,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
legend.text = element_blank(),
plot.title = element_text(size=20),
axis.ticks = element_blank())
return(bar_plot)
}
comp_other_plots <- purrr::pmap(list(tidy_all_data_final_fit_baseline_list[2:all_data_param_baseline],
var_explained[2:all_data_param_baseline],
comp_idx_vec[2:all_data_param_baseline
]),~pls_vi_plot_no_label(data_input=..1,
var_input = ..2,
idx_input = ..3))
comp_other_plots_combined <- ggpubr::ggarrange(plotlist=comp_other_plots, nrow =1,ncol = length(var_explained)-1)
comp_plots_all <- gridExtra::grid.arrange(comp_one_plot,comp_other_plots_combined,nrow = 1, ncol = 2, widths = c(4.5, 4))
comp_plots_all
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
corr_baseline_train <- processed_ses_baseline_train_select[[1]]
corr_baseline_test <- processed_ses_baseline_test_select[[1]]
corr_data_all <- rbind(corr_baseline_train,corr_baseline_test)
corr_features <- corr_data_all %>% dplyr::select(-"gfactor") %>%colnames()
corr_all_features <- purrr::map(.x = corr_features,~cor(corr_data_all[[.x]],corr_data_all[["gfactor"]]))%>%
do.call(rbind,.)%>% as.numeric()
corr_all_features_cor_test <- purrr::map(.x = corr_features,
~cor.test(corr_data_all[[.x]],corr_data_all[["gfactor"]],method="pearson"))
corr_all_features_ci <- purrr::map(corr_all_features_cor_test,"conf.int")%>%
do.call(rbind,.)%>% tibble::as_tibble()%>%
rename(low=V1,upp=V2)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
corr_output_tibble <- tibble(feature_names = corr_features,value = corr_all_features)
corr_output_tibble <- cbind(corr_output_tibble,corr_all_features_ci)
corr_baseline_all_sites_names <- full_join(corr_output_tibble,plotting_names, by = "feature_names")%>% drop_na()
corr_baseline_all_sites_names%>%
mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray30",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
coord_flip()+
theme_classic() +
labs(y =paste0( "Correlation ") , x = "") +
theme(axis.title.x= element_text(size = 20),
axis.title.y= element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.text.x = element_text(size = 20))
corr_baseline_all_sites_names_for_all <-corr_baseline_all_sites_names%>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =tidy_all_data_final_fit_baseline_reordered))
corr_bar_plot_baseline <- corr_baseline_all_sites_names_for_all%>%
#mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray40",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
scale_y_continuous(limits = c(-0.35, 0.45),
breaks = c(-0.35,0, 0.45))+
coord_flip()+
theme_classic() +
labs(title = "Univariate \ncorrelations")+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 20,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
legend.text = element_blank(),
plot.title = element_text(size=20),
axis.ticks = element_blank())
vi_pls_plot_baseline_all <-gridExtra::grid.arrange(comp_plots_all,corr_bar_plot_baseline,nrow = 1, ncol = 2, widths = c(7, 1))
vi_pls_plot_baseline_all
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (1-1,2-2) arrange gtable[layout]
Model across all sites.
data_all_site_followup <- rbind(processed_ses_followup_train_select[[1]],
processed_ses_followup_test_select[[1]])
all_data_recipe_followup <- recipe_prep_scale(train_input=data_all_site_followup,
features_input = dummy_features)
## follow the function use a more parsimonious model
## cut at the number of component that does not reduce 0.1% of the RMSE
all_data_fit_followup <- pls_tune(recipe_input = all_data_recipe_followup,
feature_input = dummy_features)
all_data_wf_followup <- all_data_fit_followup[["pls_final_wf"]]
all_data_final_fit_followup <- all_data_wf_followup%>%
parsnip::extract_spec_parsnip()%>%
parsnip::fit(data = data_all_site_followup, formula= as.formula("gfactor~."))
tidy_all_data_final_fit_followup <- all_data_final_fit_followup%>%
tidy()
all_data_param_followup <-all_data_fit_followup[["best_pls_model"]][["num_comp"]]
### extract the variance explained by each component
var_explained_followup <- all_data_final_fit_followup[["fit"]][["prop_expl_var"]][["X"]]
### plotting feature importance based on the model with all the data
comp_idx_vec_followup <- c(1:all_data_param_followup)
tidy_all_data_final_fit_followup <- tidy_all_data_final_fit_followup %>%
rename(feature_names = term)
tidy_all_data_final_fit_followup_with_name<- full_join(tidy_all_data_final_fit_followup,
plotting_names, by = "feature_names")%>%
filter(feature_names != "Y", # outcome variable col name
)
tidy_all_data_final_fit_followup_list <- purrr::map(comp_idx_vec_followup,
~filter(tidy_all_data_final_fit_followup_with_name,
component == .)%>%
dplyr::select(all_of(c("plotting_name","value"))))
### get the variable order from the first component:
tidy_all_data_final_fit_followup_reordered <- tidy_all_data_final_fit_followup_list[[1]] %>%
arrange(value)
tidy_all_data_final_fit_followup_reordered<- tidy_all_data_final_fit_followup_reordered$plotting_name
comp_one_plot_followup <- pls_vi_plot_with_label(data_input=tidy_all_data_final_fit_followup_list[[1]],
var_input = var_explained_followup[1],
idx_input = comp_idx_vec_followup[1],
reorder_name=tidy_all_data_final_fit_followup_reordered)
comp_other_plots_followup <- purrr::pmap(list(tidy_all_data_final_fit_followup_list[2:all_data_param_followup],
var_explained_followup[2:all_data_param_followup],
comp_idx_vec_followup[2:all_data_param_followup]
),~pls_vi_plot_no_label(data_input=..1,
var_input = ..2,
idx_input = ..3,
reorder_name=tidy_all_data_final_fit_followup_reordered))
comp_other_plots_combined_followup <- ggpubr::ggarrange(plotlist=comp_other_plots_followup,
nrow =1,ncol = length(var_explained_followup)-1)
comp_plots_all_followup <- gridExtra::grid.arrange(comp_one_plot_followup,
comp_other_plots_combined_followup,
nrow = 1, ncol = 2, widths = c(4.5, 4))
comp_plots_all_followup
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
The bar plot for correlations across all sites. Train and test fold are joined together.
corr_data_all_followup <- rbind(processed_ses_followup_train[[1]],processed_ses_followup_test[[1]])
corr_all_features_followup <- purrr::map(.x = dummy_features,~cor(corr_data_all_followup[[.x]],
corr_data_all_followup[["gfactor"]]))%>%
do.call(rbind,.)%>% as.numeric()
corr_all_features_cor_test_followup <- purrr::map(.x = dummy_features,
~cor.test(corr_data_all_followup[[.x]],
corr_data_all_followup[["gfactor"]],method="pearson"))
corr_all_features_ci_followup <- purrr::map(corr_all_features_cor_test_followup,"conf.int")%>%
do.call(rbind,.)%>% tibble::as_tibble()%>%
rename(low=V1,upp=V2)
corr_output_tibble_followup <- tibble(feature_names = dummy_features,value = corr_all_features_followup)
corr_output_tibble_followup <- cbind(corr_output_tibble_followup,corr_all_features_ci_followup)
corr_followup_all_sites_names <- full_join(corr_output_tibble_followup,
plotting_names, by = "feature_names")%>% drop_na()
corr_followup_all_sites_names%>%
mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray30",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
coord_flip()+
theme_classic() +
labs(y =paste0( "Correlation ") , x = "") +
theme(axis.title.x= element_text(size = 20),
axis.title.y= element_text(size = 20),
axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 20))
corr_followup_all_sites_names_for_all <-corr_followup_all_sites_names%>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =tidy_all_data_final_fit_followup_reordered))
corr_bar_plot_followup <- corr_followup_all_sites_names_for_all%>%
#mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray40",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
scale_y_continuous(limits = c(-0.35, 0.45),
breaks = c(-0.35,0, 0.45))+##use this because all the plots should have the same height
coord_flip()+
theme_classic() +
labs(title = "Univariate \ncorrelations")+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 20,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
legend.text = element_blank(),
plot.title = element_text(size=20),
axis.ticks = element_blank())
vi_pls_plot_followup_all <-gridExtra::grid.arrange(comp_plots_all_followup,
corr_bar_plot_followup,
nrow = 1, ncol = 2, widths = c(7, 1))
vi_pls_plot_followup_all
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (1-1,2-2) arrange gtable[layout]
Combine baseline and followup together
vi_pls_plot_baseline_label <- vi_pls_plot_baseline_all %>%
ggpubr::annotate_figure(top = ggpubr::text_grob("Baseline",size=20,face = "bold",hjust=3.2))
vi_pls_plot_followup_label <- vi_pls_plot_followup_all %>%
ggpubr::annotate_figure(top = ggpubr::text_grob("Followup",size=20,face = "bold",hjust=3))
vi_pls_plot_label <- ggpubr::ggarrange(vi_pls_plot_baseline_label,vi_pls_plot_followup_label,nrow = 2)
title_vi_pls_plot <- ggpubr::annotate_figure(vi_pls_plot_label,
top = ggpubr::text_grob("Feature importance of Partial Least Squares Regressions Predicting Cognitive \nAbilities from Social-Demographics, Lifestyles and Developments Variables",size=25, face = "bold"))
title_vi_pls_plot
Change the order of the followup plots
### get the variable order from the first component:
comp_plots_baseline_followup <- purrr::pmap(list(tidy_all_data_final_fit_followup_list[1:all_data_param_followup],
var_explained_followup[1:all_data_param_followup],
comp_idx_vec_followup[1:all_data_param_followup]
),~pls_vi_plot_no_label(data_input=..1,
var_input = ..2,
idx_input = ..3,
reorder_name=tidy_all_data_final_fit_baseline_reordered))
comp_plots_combined_baseline_followup <- ggpubr::ggarrange(plotlist=comp_plots_baseline_followup,
nrow =1,ncol = length(var_explained_followup))
comp_plots_combined_baseline_followup
corr_followup_baseline_all_sites_names_for_all <-corr_followup_all_sites_names%>%
mutate(plotting_name = as.factor(plotting_name))%>%
mutate(plotting_name = factor(plotting_name,
levels =tidy_all_data_final_fit_baseline_reordered))
corr_bar_plot_followup_baseline <- corr_followup_baseline_all_sites_names_for_all%>%
#mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
ggplot(aes(x = plotting_name, y = value))+
geom_bar(stat = "identity",fill="gray40",alpha = 0.7)+
geom_errorbar( aes(x=plotting_name,
ymin=low,
ymax=upp),
width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
scale_y_continuous(limits = c(-0.35, 0.45),
breaks = c(-0.35,0, 0.45))+##use this because all the plots should have the same height
coord_flip()+
theme_classic() +
labs(title = "Univariate \ncorrelations")+
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 12,angle = 60,vjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
legend.text = element_blank(),
plot.title = element_text(size=15),
axis.ticks = element_blank())
vi_pls_plot_followup_baseline_all <-gridExtra::grid.arrange(comp_plots_combined_baseline_followup,
corr_bar_plot_followup_baseline,
nrow = 1, ncol = 2, widths = c(8, 1.4))
vi_pls_plot_followup_baseline_all
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
vi_pls_plot_baseline_no_label <- vi_pls_plot_baseline_all %>%
ggpubr::annotate_figure(top = ggpubr::text_grob("Baseline",size=20,hjust=2.5))
vi_pls_plot_followup_no_label <- vi_pls_plot_followup_baseline_all %>%
ggpubr::annotate_figure(top = ggpubr::text_grob("Followup",size=20,hjust=2.5))
vi_pls_plot_label <- ggpubr::ggarrange(vi_pls_plot_baseline_no_label,vi_pls_plot_followup_no_label,ncol = 2,widths = c(1.5,1))
title_vi_pls_plot <- ggpubr::annotate_figure(vi_pls_plot_label,
top = ggpubr::text_grob("Feature importance of Partial Least Squares Regressions \nPredicting Cognitive Abilities from Social-Demographics, \nLifestyles and Developments Variables",size=25, face = "bold"))
title_vi_pls_plot
output_list <- list(baseline_train_pred = ses_pls_pred_baseline_train,
baseline_test_pred = ses_pls_pred_baseline,
baseline_train_data = processed_ses_baseline_train,
baseline_test_data = processed_ses_baseline_test,
followup_train_pred = ses_pls_pred_followup_train,
followup_test_pred = ses_pls_pred_followup,
followup_train_data =processed_ses_followup_train ,
followup_test_data = processed_ses_followup_test)
saveRDS(output_list,paste0(scriptfold,'genetics_psychopathology_common_scan_all_scripts/ses_pls_pred', '.RData'))
save the metrics
ses_baseline_metric_outout_table <- ses_baseline_metric_avg_table %>% mutate(event = "baseline")
ses_followup_metric_output_table<- ses_followup_metric_avg_table %>% mutate(event = "followup")
output_table <- bind_rows(ses_baseline_metric_outout_table,ses_followup_metric_output_table)%>%
mutate(modality = "Social Demo Lifestyle Dev")
saveRDS(output_table,paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/performance_metrics/ses_performance_metric', '.RData'))
2.7 social demographics
race/ethnicity is from ACS family income family type household size parents’ work status # demo_prnt_empl_v2 [a bit too much to include] parents’ education sumEcon_insecurities
2.7.1 ABCD Parent Demographics Survey
Variable type: character
Variable type: logical
Variable type: numeric
Variable type: character
Variable type: logical
Variable type: numeric
Variable type: character
Variable type: factor
Variable type: numeric
Variable type: character
Variable type: factor
Variable type: numeric
2.7.2 more Social Demographics from Residential History Derived Scores
“RESHIST_ADDR1_ADI_WSUM” Residential history derived - Area Deprivation Index: scaled weighted sum based on Kind et al., Annals of Internal Medicine, 2014 1 “RESHIST_ADDR1_GRNDTOT” the grand total Uniform Crime Reports, “RESHIST_ADDR1_LEADRISK” the estimated lead risk in census tract of primary residential address
Variable type: character
Variable type: numeric
Variable type: character
Variable type: numeric